diff --git "a/wandb/run-20220731_183904-2sjxhpmt/files/wandb-summary.json" "b/wandb/run-20220731_183904-2sjxhpmt/files/wandb-summary.json" --- "a/wandb/run-20220731_183904-2sjxhpmt/files/wandb-summary.json" +++ "b/wandb/run-20220731_183904-2sjxhpmt/files/wandb-summary.json" @@ -1 +1 @@ -{"train/grad_norm": 5.40625, "layer_grad_norm/": {"lm_head": {"bias": 0.0277099609375, "kernel": 4.8125}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 0.037841796875, "scale": 0.04345703125}, "layers": {"0": {"attention": {"k_proj": {"bias": 6.008148193359375e-05, "kernel": 0.017822265625}, "out_proj": {"bias": 0.0185546875, "kernel": 0.13671875}, "q_proj": {"bias": 0.0017547607421875, "kernel": 0.0159912109375}, "v_proj": {"bias": 0.022705078125, "kernel": 0.115234375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0294189453125, "kernel": 0.34765625}, "output_dense": {"bias": 0.00616455078125, "kernel": 0.265625}}, "final_layer_norm": {"bias": 0.12060546875, "scale": 0.24609375}, "layer_norm": {"bias": 0.053955078125, "scale": 0.06787109375}}, "1": {"attention": {"k_proj": {"bias": 3.266334533691406e-05, "kernel": 0.0201416015625}, "out_proj": {"bias": 0.00665283203125, "kernel": 0.05908203125}, "q_proj": {"bias": 0.00213623046875, "kernel": 0.0223388671875}, "v_proj": {"bias": 0.012451171875, "kernel": 0.04541015625}}, "feed_forward": {"intermediate_dense": {"bias": 0.01220703125, "kernel": 0.1845703125}, "output_dense": {"bias": 0.0062255859375, "kernel": 0.126953125}}, "final_layer_norm": {"bias": 0.0283203125, "scale": 0.024658203125}, "layer_norm": {"bias": 0.03515625, "scale": 0.037109375}}, "10": {"attention": {"k_proj": {"bias": 3.838539123535156e-05, "kernel": 0.0732421875}, "out_proj": {"bias": 0.004608154296875, "kernel": 0.08203125}, "q_proj": {"bias": 0.005645751953125, "kernel": 0.06640625}, "v_proj": {"bias": 0.00860595703125, "kernel": 0.072265625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0087890625, "kernel": 0.123046875}, "output_dense": {"bias": 0.00439453125, "kernel": 0.0947265625}}, "final_layer_norm": {"bias": 0.018310546875, "scale": 0.0185546875}, "layer_norm": {"bias": 0.028076171875, "scale": 0.032958984375}}, "11": {"attention": {"k_proj": {"bias": 7.2479248046875e-05, "kernel": 0.0703125}, "out_proj": {"bias": 0.004547119140625, "kernel": 0.119140625}, "q_proj": {"bias": 0.00531005859375, "kernel": 0.068359375}, "v_proj": {"bias": 0.009521484375, "kernel": 0.109375}}, "feed_forward": {"intermediate_dense": {"bias": 0.00823974609375, "kernel": 0.1201171875}, "output_dense": {"bias": 0.00439453125, "kernel": 0.087890625}}, "final_layer_norm": {"bias": 0.0172119140625, "scale": 0.0185546875}, "layer_norm": {"bias": 0.028076171875, "scale": 0.04931640625}}, "12": {"attention": {"k_proj": {"bias": 4.839897155761719e-05, "kernel": 0.06787109375}, "out_proj": {"bias": 0.00457763671875, "kernel": 0.09228515625}, "q_proj": {"bias": 0.0057373046875, "kernel": 0.0712890625}, "v_proj": {"bias": 0.00897216796875, "kernel": 0.083984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.00885009765625, "kernel": 0.1220703125}, "output_dense": {"bias": 0.004425048828125, "kernel": 0.08837890625}}, "final_layer_norm": {"bias": 0.0186767578125, "scale": 0.0166015625}, "layer_norm": {"bias": 0.02734375, "scale": 0.044921875}}, "13": {"attention": {"k_proj": {"bias": 7.62939453125e-05, "kernel": 0.0810546875}, "out_proj": {"bias": 0.0045166015625, "kernel": 0.1220703125}, "q_proj": {"bias": 0.00653076171875, "kernel": 0.08447265625}, "v_proj": {"bias": 0.009521484375, "kernel": 0.11328125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00860595703125, "kernel": 0.1201171875}, "output_dense": {"bias": 0.004425048828125, "kernel": 0.0908203125}}, "final_layer_norm": {"bias": 0.018798828125, "scale": 0.0162353515625}, "layer_norm": {"bias": 0.0274658203125, "scale": 0.0361328125}}, "14": {"attention": {"k_proj": {"bias": 0.00010013580322265625, "kernel": 0.052734375}, "out_proj": {"bias": 0.0045166015625, "kernel": 0.0966796875}, "q_proj": {"bias": 0.0040283203125, "kernel": 0.05224609375}, "v_proj": {"bias": 0.008544921875, "kernel": 0.0869140625}}, "feed_forward": {"intermediate_dense": {"bias": 0.00933837890625, "kernel": 0.130859375}, "output_dense": {"bias": 0.00445556640625, "kernel": 0.0986328125}}, "final_layer_norm": {"bias": 0.021728515625, "scale": 0.0228271484375}, "layer_norm": {"bias": 0.02099609375, "scale": 0.0198974609375}}, "15": {"attention": {"k_proj": {"bias": 0.00022220611572265625, "kernel": 0.1162109375}, "out_proj": {"bias": 0.00439453125, "kernel": 0.1953125}, "q_proj": {"bias": 0.00830078125, "kernel": 0.10791015625}, "v_proj": {"bias": 0.009765625, "kernel": 0.1474609375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0087890625, "kernel": 0.1259765625}, "output_dense": {"bias": 0.004302978515625, "kernel": 0.109375}}, "final_layer_norm": {"bias": 0.0184326171875, "scale": 0.0186767578125}, "layer_norm": {"bias": 0.0294189453125, "scale": 0.03173828125}}, "16": {"attention": {"k_proj": {"bias": 0.00010395050048828125, "kernel": 0.0791015625}, "out_proj": {"bias": 0.00439453125, "kernel": 0.12890625}, "q_proj": {"bias": 0.005767822265625, "kernel": 0.0732421875}, "v_proj": {"bias": 0.0084228515625, "kernel": 0.1025390625}}, "feed_forward": {"intermediate_dense": {"bias": 0.00830078125, "kernel": 0.12109375}, "output_dense": {"bias": 0.0042724609375, "kernel": 0.10107421875}}, "final_layer_norm": {"bias": 0.017578125, "scale": 0.01483154296875}, "layer_norm": {"bias": 0.024169921875, "scale": 0.0274658203125}}, "17": {"attention": {"k_proj": {"bias": 2.682209014892578e-05, "kernel": 0.0673828125}, "out_proj": {"bias": 0.00445556640625, "kernel": 0.072265625}, "q_proj": {"bias": 0.00457763671875, "kernel": 0.057861328125}, "v_proj": {"bias": 0.00836181640625, "kernel": 0.0703125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00830078125, "kernel": 0.1201171875}, "output_dense": {"bias": 0.004302978515625, "kernel": 0.1025390625}}, "final_layer_norm": {"bias": 0.017578125, "scale": 0.0162353515625}, "layer_norm": {"bias": 0.0235595703125, "scale": 0.025390625}}, "18": {"attention": {"k_proj": {"bias": 0.00011491775512695312, "kernel": 0.0869140625}, "out_proj": {"bias": 0.00433349609375, "kernel": 0.13671875}, "q_proj": {"bias": 0.00555419921875, "kernel": 0.08251953125}, "v_proj": {"bias": 0.008544921875, "kernel": 0.1103515625}}, "feed_forward": {"intermediate_dense": {"bias": 0.008056640625, "kernel": 0.12109375}, "output_dense": {"bias": 0.004241943359375, "kernel": 0.1005859375}}, "final_layer_norm": {"bias": 0.0164794921875, "scale": 0.016357421875}, "layer_norm": {"bias": 0.02392578125, "scale": 0.023681640625}}, "19": {"attention": {"k_proj": {"bias": 5.435943603515625e-05, "kernel": 0.0556640625}, "out_proj": {"bias": 0.0042724609375, "kernel": 0.0869140625}, "q_proj": {"bias": 0.00433349609375, "kernel": 0.06298828125}, "v_proj": {"bias": 0.0081787109375, "kernel": 0.0771484375}}, "feed_forward": {"intermediate_dense": {"bias": 0.00799560546875, "kernel": 0.12451171875}, "output_dense": {"bias": 0.004180908203125, "kernel": 0.10205078125}}, "final_layer_norm": {"bias": 0.01611328125, "scale": 0.019287109375}, "layer_norm": {"bias": 0.02294921875, "scale": 0.022705078125}}, "2": {"attention": {"k_proj": {"bias": 4.1484832763671875e-05, "kernel": 0.033935546875}, "out_proj": {"bias": 0.00677490234375, "kernel": 0.0810546875}, "q_proj": {"bias": 0.0035858154296875, "kernel": 0.035400390625}, "v_proj": {"bias": 0.013916015625, "kernel": 0.068359375}}, "feed_forward": {"intermediate_dense": {"bias": 0.01287841796875, "kernel": 0.205078125}, "output_dense": {"bias": 0.006195068359375, "kernel": 0.12890625}}, "final_layer_norm": {"bias": 0.028076171875, "scale": 0.0228271484375}, "layer_norm": {"bias": 0.037353515625, "scale": 0.05615234375}}, "20": {"attention": {"k_proj": {"bias": 1.3053417205810547e-05, "kernel": 0.03173828125}, "out_proj": {"bias": 0.00439453125, "kernel": 0.046875}, "q_proj": {"bias": 0.002197265625, "kernel": 0.03369140625}, "v_proj": {"bias": 0.0076904296875, "kernel": 0.043701171875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0081787109375, "kernel": 0.1318359375}, "output_dense": {"bias": 0.0042724609375, "kernel": 0.1044921875}}, "final_layer_norm": {"bias": 0.016357421875, "scale": 0.01611328125}, "layer_norm": {"bias": 0.0181884765625, "scale": 0.012939453125}}, "21": {"attention": {"k_proj": {"bias": 4.4345855712890625e-05, "kernel": 0.052490234375}, "out_proj": {"bias": 0.00439453125, "kernel": 0.0859375}, "q_proj": {"bias": 0.0037384033203125, "kernel": 0.05224609375}, "v_proj": {"bias": 0.0081787109375, "kernel": 0.0751953125}}, "feed_forward": {"intermediate_dense": {"bias": 0.008544921875, "kernel": 0.140625}, "output_dense": {"bias": 0.004302978515625, "kernel": 0.10693359375}}, "final_layer_norm": {"bias": 0.017578125, "scale": 0.022705078125}, "layer_norm": {"bias": 0.02001953125, "scale": 0.0274658203125}}, "22": {"attention": {"k_proj": {"bias": 3.552436828613281e-05, "kernel": 0.07421875}, "out_proj": {"bias": 0.00445556640625, "kernel": 0.0732421875}, "q_proj": {"bias": 0.005126953125, "kernel": 0.072265625}, "v_proj": {"bias": 0.0086669921875, "kernel": 0.068359375}}, "feed_forward": {"intermediate_dense": {"bias": 0.00872802734375, "kernel": 0.14453125}, "output_dense": {"bias": 0.00439453125, "kernel": 0.1083984375}}, "final_layer_norm": {"bias": 0.0185546875, "scale": 0.0234375}, "layer_norm": {"bias": 0.024169921875, "scale": 0.023681640625}}, "23": {"attention": {"k_proj": {"bias": 0.00016021728515625, "kernel": 0.1103515625}, "out_proj": {"bias": 0.004364013671875, "kernel": 0.16015625}, "q_proj": {"bias": 0.0064697265625, "kernel": 0.10595703125}, "v_proj": {"bias": 0.0098876953125, "kernel": 0.142578125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00872802734375, "kernel": 0.15234375}, "output_dense": {"bias": 0.004302978515625, "kernel": 0.11181640625}}, "final_layer_norm": {"bias": 0.01904296875, "scale": 0.027587890625}, "layer_norm": {"bias": 0.0283203125, "scale": 0.030517578125}}, "24": {"attention": {"k_proj": {"bias": 9.870529174804688e-05, "kernel": 0.125}, "out_proj": {"bias": 0.004150390625, "kernel": 0.1494140625}, "q_proj": {"bias": 0.00823974609375, "kernel": 0.126953125}, "v_proj": {"bias": 0.0096435546875, "kernel": 0.1376953125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0086669921875, "kernel": 0.154296875}, "output_dense": {"bias": 0.0040283203125, "kernel": 0.0986328125}}, "final_layer_norm": {"bias": 0.018798828125, "scale": 0.030029296875}, "layer_norm": {"bias": 0.03125, "scale": 0.0380859375}}, "25": {"attention": {"k_proj": {"bias": 6.818771362304688e-05, "kernel": 0.09130859375}, "out_proj": {"bias": 0.004150390625, "kernel": 0.1162109375}, "q_proj": {"bias": 0.00604248046875, "kernel": 0.0927734375}, "v_proj": {"bias": 0.00909423828125, "kernel": 0.111328125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0087890625, "kernel": 0.16015625}, "output_dense": {"bias": 0.0040283203125, "kernel": 0.1005859375}}, "final_layer_norm": {"bias": 0.0205078125, "scale": 0.024169921875}, "layer_norm": {"bias": 0.0263671875, "scale": 0.03466796875}}, "26": {"attention": {"k_proj": {"bias": 7.534027099609375e-05, "kernel": 0.10546875}, "out_proj": {"bias": 0.0040283203125, "kernel": 0.1103515625}, "q_proj": {"bias": 0.006744384765625, "kernel": 0.1064453125}, "v_proj": {"bias": 0.0091552734375, "kernel": 0.10400390625}}, "feed_forward": {"intermediate_dense": {"bias": 0.008056640625, "kernel": 0.1396484375}, "output_dense": {"bias": 0.003936767578125, "kernel": 0.0966796875}}, "final_layer_norm": {"bias": 0.018310546875, "scale": 0.0223388671875}, "layer_norm": {"bias": 0.027099609375, "scale": 0.02978515625}}, "27": {"attention": {"k_proj": {"bias": 0.00014591217041015625, "kernel": 0.146484375}, "out_proj": {"bias": 0.003753662109375, "kernel": 0.158203125}, "q_proj": {"bias": 0.0093994140625, "kernel": 0.146484375}, "v_proj": {"bias": 0.009765625, "kernel": 0.1533203125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00823974609375, "kernel": 0.142578125}, "output_dense": {"bias": 0.0036773681640625, "kernel": 0.0966796875}}, "final_layer_norm": {"bias": 0.0191650390625, "scale": 0.0233154296875}, "layer_norm": {"bias": 0.03466796875, "scale": 0.0478515625}}, "28": {"attention": {"k_proj": {"bias": 0.00011873245239257812, "kernel": 0.1318359375}, "out_proj": {"bias": 0.0034637451171875, "kernel": 0.146484375}, "q_proj": {"bias": 0.00848388671875, "kernel": 0.13671875}, "v_proj": {"bias": 0.0089111328125, "kernel": 0.142578125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00732421875, "kernel": 0.1298828125}, "output_dense": {"bias": 0.0034332275390625, "kernel": 0.09228515625}}, "final_layer_norm": {"bias": 0.0162353515625, "scale": 0.0233154296875}, "layer_norm": {"bias": 0.031982421875, "scale": 0.051025390625}}, "29": {"attention": {"k_proj": {"bias": 0.00011110305786132812, "kernel": 0.140625}, "out_proj": {"bias": 0.00335693359375, "kernel": 0.13671875}, "q_proj": {"bias": 0.00830078125, "kernel": 0.1484375}, "v_proj": {"bias": 0.00872802734375, "kernel": 0.1337890625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0078125, "kernel": 0.146484375}, "output_dense": {"bias": 0.003265380859375, "kernel": 0.095703125}}, "final_layer_norm": {"bias": 0.017333984375, "scale": 0.01806640625}, "layer_norm": {"bias": 0.031005859375, "scale": 0.038330078125}}, "3": {"attention": {"k_proj": {"bias": 9.72747802734375e-05, "kernel": 0.0634765625}, "out_proj": {"bias": 0.006439208984375, "kernel": 0.150390625}, "q_proj": {"bias": 0.00604248046875, "kernel": 0.058837890625}, "v_proj": {"bias": 0.012451171875, "kernel": 0.1142578125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0128173828125, "kernel": 0.1875}, "output_dense": {"bias": 0.005889892578125, "kernel": 0.12353515625}}, "final_layer_norm": {"bias": 0.02978515625, "scale": 0.0286865234375}, "layer_norm": {"bias": 0.034423828125, "scale": 0.04296875}}, "30": {"attention": {"k_proj": {"bias": 0.0001430511474609375, "kernel": 0.125}, "out_proj": {"bias": 0.003173828125, "kernel": 0.134765625}, "q_proj": {"bias": 0.00738525390625, "kernel": 0.12890625}, "v_proj": {"bias": 0.0084228515625, "kernel": 0.142578125}}, "feed_forward": {"intermediate_dense": {"bias": 0.007720947265625, "kernel": 0.1513671875}, "output_dense": {"bias": 0.0030517578125, "kernel": 0.0888671875}}, "final_layer_norm": {"bias": 0.0172119140625, "scale": 0.0164794921875}, "layer_norm": {"bias": 0.0281982421875, "scale": 0.033203125}}, "31": {"attention": {"k_proj": {"bias": 0.0001888275146484375, "kernel": 0.1103515625}, "out_proj": {"bias": 0.0029449462890625, "kernel": 0.10986328125}, "q_proj": {"bias": 0.006134033203125, "kernel": 0.1083984375}, "v_proj": {"bias": 0.00732421875, "kernel": 0.119140625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0072021484375, "kernel": 0.138671875}, "output_dense": {"bias": 0.002838134765625, "kernel": 0.0810546875}}, "final_layer_norm": {"bias": 0.0164794921875, "scale": 0.0177001953125}, "layer_norm": {"bias": 0.02392578125, "scale": 0.027587890625}}, "32": {"attention": {"k_proj": {"bias": 0.00012969970703125, "kernel": 0.1123046875}, "out_proj": {"bias": 0.00274658203125, "kernel": 0.1025390625}, "q_proj": {"bias": 0.006500244140625, "kernel": 0.11376953125}, "v_proj": {"bias": 0.00726318359375, "kernel": 0.11962890625}}, "feed_forward": {"intermediate_dense": {"bias": 0.006591796875, "kernel": 0.126953125}, "output_dense": {"bias": 0.002593994140625, "kernel": 0.0712890625}}, "final_layer_norm": {"bias": 0.015869140625, "scale": 0.017333984375}, "layer_norm": {"bias": 0.025634765625, "scale": 0.0361328125}}, "33": {"attention": {"k_proj": {"bias": 0.00012493133544921875, "kernel": 0.1240234375}, "out_proj": {"bias": 0.002471923828125, "kernel": 0.0966796875}, "q_proj": {"bias": 0.00726318359375, "kernel": 0.1259765625}, "v_proj": {"bias": 0.00653076171875, "kernel": 0.107421875}}, "feed_forward": {"intermediate_dense": {"bias": 0.006103515625, "kernel": 0.11328125}, "output_dense": {"bias": 0.0023651123046875, "kernel": 0.06396484375}}, "final_layer_norm": {"bias": 0.01611328125, "scale": 0.01422119140625}, "layer_norm": {"bias": 0.025390625, "scale": 0.0289306640625}}, "34": {"attention": {"k_proj": {"bias": 0.0002727508544921875, "kernel": 0.12353515625}, "out_proj": {"bias": 0.0022430419921875, "kernel": 0.095703125}, "q_proj": {"bias": 0.007080078125, "kernel": 0.1240234375}, "v_proj": {"bias": 0.00616455078125, "kernel": 0.109375}}, "feed_forward": {"intermediate_dense": {"bias": 0.00537109375, "kernel": 0.095703125}, "output_dense": {"bias": 0.00213623046875, "kernel": 0.058349609375}}, "final_layer_norm": {"bias": 0.0137939453125, "scale": 0.0115966796875}, "layer_norm": {"bias": 0.025634765625, "scale": 0.03955078125}}, "35": {"attention": {"k_proj": {"bias": 0.00015735626220703125, "kernel": 0.10595703125}, "out_proj": {"bias": 0.001953125, "kernel": 0.09375}, "q_proj": {"bias": 0.00634765625, "kernel": 0.109375}, "v_proj": {"bias": 0.005035400390625, "kernel": 0.09375}}, "feed_forward": {"intermediate_dense": {"bias": 0.00439453125, "kernel": 0.07666015625}, "output_dense": {"bias": 0.0019073486328125, "kernel": 0.048583984375}}, "final_layer_norm": {"bias": 0.010986328125, "scale": 0.01104736328125}, "layer_norm": {"bias": 0.026123046875, "scale": 0.034423828125}}, "36": {"attention": {"k_proj": {"bias": 0.0001163482666015625, "kernel": 0.0986328125}, "out_proj": {"bias": 0.001800537109375, "kernel": 0.0751953125}, "q_proj": {"bias": 0.00604248046875, "kernel": 0.099609375}, "v_proj": {"bias": 0.00433349609375, "kernel": 0.0751953125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00390625, "kernel": 0.06640625}, "output_dense": {"bias": 0.00174713134765625, "kernel": 0.041015625}}, "final_layer_norm": {"bias": 0.00958251953125, "scale": 0.0096435546875}, "layer_norm": {"bias": 0.0218505859375, "scale": 0.02392578125}}, "37": {"attention": {"k_proj": {"bias": 0.00011968612670898438, "kernel": 0.0732421875}, "out_proj": {"bias": 0.001678466796875, "kernel": 0.064453125}, "q_proj": {"bias": 0.00469970703125, "kernel": 0.0771484375}, "v_proj": {"bias": 0.00408935546875, "kernel": 0.0712890625}}, "feed_forward": {"intermediate_dense": {"bias": 0.003692626953125, "kernel": 0.0634765625}, "output_dense": {"bias": 0.001617431640625, "kernel": 0.03759765625}}, "final_layer_norm": {"bias": 0.009521484375, "scale": 0.008056640625}, "layer_norm": {"bias": 0.01904296875, "scale": 0.0257568359375}}, "38": {"attention": {"k_proj": {"bias": 0.0001049041748046875, "kernel": 0.1171875}, "out_proj": {"bias": 0.0015411376953125, "kernel": 0.0556640625}, "q_proj": {"bias": 0.00653076171875, "kernel": 0.11376953125}, "v_proj": {"bias": 0.0036773681640625, "kernel": 0.0654296875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0032958984375, "kernel": 0.05810546875}, "output_dense": {"bias": 0.00146484375, "kernel": 0.03369140625}}, "final_layer_norm": {"bias": 0.009033203125, "scale": 0.010009765625}, "layer_norm": {"bias": 0.020751953125, "scale": 0.0203857421875}}, "39": {"attention": {"k_proj": {"bias": 9.584426879882812e-05, "kernel": 0.107421875}, "out_proj": {"bias": 0.0013427734375, "kernel": 0.052734375}, "q_proj": {"bias": 0.0059814453125, "kernel": 0.10546875}, "v_proj": {"bias": 0.003509521484375, "kernel": 0.06396484375}}, "feed_forward": {"intermediate_dense": {"bias": 0.00299072265625, "kernel": 0.05322265625}, "output_dense": {"bias": 0.00128173828125, "kernel": 0.0308837890625}}, "final_layer_norm": {"bias": 0.0086669921875, "scale": 0.0079345703125}, "layer_norm": {"bias": 0.020751953125, "scale": 0.021240234375}}, "4": {"attention": {"k_proj": {"bias": 0.00015735626220703125, "kernel": 0.064453125}, "out_proj": {"bias": 0.00616455078125, "kernel": 0.1865234375}, "q_proj": {"bias": 0.00537109375, "kernel": 0.06396484375}, "v_proj": {"bias": 0.01153564453125, "kernel": 0.146484375}}, "feed_forward": {"intermediate_dense": {"bias": 0.01190185546875, "kernel": 0.1669921875}, "output_dense": {"bias": 0.00567626953125, "kernel": 0.123046875}}, "final_layer_norm": {"bias": 0.02490234375, "scale": 0.021728515625}, "layer_norm": {"bias": 0.0302734375, "scale": 0.03466796875}}, "40": {"attention": {"k_proj": {"bias": 9.5367431640625e-05, "kernel": 0.08056640625}, "out_proj": {"bias": 0.001220703125, "kernel": 0.0439453125}, "q_proj": {"bias": 0.005157470703125, "kernel": 0.0869140625}, "v_proj": {"bias": 0.00274658203125, "kernel": 0.04638671875}}, "feed_forward": {"intermediate_dense": {"bias": 0.0025787353515625, "kernel": 0.04443359375}, "output_dense": {"bias": 0.00115966796875, "kernel": 0.025390625}}, "final_layer_norm": {"bias": 0.00726318359375, "scale": 0.0067138671875}, "layer_norm": {"bias": 0.016357421875, "scale": 0.0185546875}}, "41": {"attention": {"k_proj": {"bias": 0.00010967254638671875, "kernel": 0.091796875}, "out_proj": {"bias": 0.00107574462890625, "kernel": 0.04736328125}, "q_proj": {"bias": 0.00457763671875, "kernel": 0.08984375}, "v_proj": {"bias": 0.003509521484375, "kernel": 0.07080078125}}, "feed_forward": {"intermediate_dense": {"bias": 0.002227783203125, "kernel": 0.03955078125}, "output_dense": {"bias": 0.0010223388671875, "kernel": 0.021728515625}}, "final_layer_norm": {"bias": 0.0068359375, "scale": 0.006622314453125}, "layer_norm": {"bias": 0.01953125, "scale": 0.0272216796875}}, "42": {"attention": {"k_proj": {"bias": 6.532669067382812e-05, "kernel": 0.037109375}, "out_proj": {"bias": 0.00099945068359375, "kernel": 0.034423828125}, "q_proj": {"bias": 0.00262451171875, "kernel": 0.044921875}, "v_proj": {"bias": 0.0023193359375, "kernel": 0.038330078125}}, "feed_forward": {"intermediate_dense": {"bias": 0.002166748046875, "kernel": 0.039306640625}, "output_dense": {"bias": 0.0009307861328125, "kernel": 0.01953125}}, "final_layer_norm": {"bias": 0.006927490234375, "scale": 0.0093994140625}, "layer_norm": {"bias": 0.01123046875, "scale": 0.018310546875}}, "43": {"attention": {"k_proj": {"bias": 2.0742416381835938e-05, "kernel": 0.0146484375}, "out_proj": {"bias": 0.00093841552734375, "kernel": 0.0196533203125}, "q_proj": {"bias": 0.0013427734375, "kernel": 0.01953125}, "v_proj": {"bias": 0.00177001953125, "kernel": 0.0234375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0020904541015625, "kernel": 0.03955078125}, "output_dense": {"bias": 0.00086212158203125, "kernel": 0.01806640625}}, "final_layer_norm": {"bias": 0.00677490234375, "scale": 0.0115966796875}, "layer_norm": {"bias": 0.0064697265625, "scale": 0.015380859375}}, "44": {"attention": {"k_proj": {"bias": 1.8835067749023438e-05, "kernel": 0.01220703125}, "out_proj": {"bias": 0.0008697509765625, "kernel": 0.01708984375}, "q_proj": {"bias": 0.00124359130859375, "kernel": 0.017822265625}, "v_proj": {"bias": 0.00162506103515625, "kernel": 0.020751953125}}, "feed_forward": {"intermediate_dense": {"bias": 0.002288818359375, "kernel": 0.046142578125}, "output_dense": {"bias": 0.000774383544921875, "kernel": 0.018310546875}}, "final_layer_norm": {"bias": 0.00885009765625, "scale": 0.0150146484375}, "layer_norm": {"bias": 0.007598876953125, "scale": 0.009033203125}}, "45": {"attention": {"k_proj": {"bias": 1.3470649719238281e-05, "kernel": 0.011474609375}, "out_proj": {"bias": 0.00078582763671875, "kernel": 0.01519775390625}, "q_proj": {"bias": 0.00104522705078125, "kernel": 0.0140380859375}, "v_proj": {"bias": 0.00152587890625, "kernel": 0.0177001953125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0023956298828125, "kernel": 0.046630859375}, "output_dense": {"bias": 0.00064849853515625, "kernel": 0.017822265625}}, "final_layer_norm": {"bias": 0.010986328125, "scale": 0.023681640625}, "layer_norm": {"bias": 0.00579833984375, "scale": 0.014404296875}}, "46": {"attention": {"k_proj": {"bias": 1.7523765563964844e-05, "kernel": 0.006317138671875}, "out_proj": {"bias": 0.000659942626953125, "kernel": 0.01611328125}, "q_proj": {"bias": 0.000759124755859375, "kernel": 0.0096435546875}, "v_proj": {"bias": 0.001373291015625, "kernel": 0.0157470703125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0020751953125, "kernel": 0.0361328125}, "output_dense": {"bias": 0.000553131103515625, "kernel": 0.0225830078125}}, "final_layer_norm": {"bias": 0.011962890625, "scale": 0.020263671875}, "layer_norm": {"bias": 0.00616455078125, "scale": 0.010009765625}}, "47": {"attention": {"k_proj": {"bias": 9.202957153320312e-05, "kernel": 0.0074462890625}, "out_proj": {"bias": 0.000514984130859375, "kernel": 0.03955078125}, "q_proj": {"bias": 0.0019989013671875, "kernel": 0.025146484375}, "v_proj": {"bias": 0.00092315673828125, "kernel": 0.013671875}}, "feed_forward": {"intermediate_dense": {"bias": 0.00067138671875, "kernel": 0.0133056640625}, "output_dense": {"bias": 0.0005035400390625, "kernel": 0.10791015625}}, "final_layer_norm": {"bias": 0.004150390625, "scale": 0.0062255859375}, "layer_norm": {"bias": 0.01611328125, "scale": 0.026123046875}}, "5": {"attention": {"k_proj": {"bias": 3.62396240234375e-05, "kernel": 0.05029296875}, "out_proj": {"bias": 0.00628662109375, "kernel": 0.0810546875}, "q_proj": {"bias": 0.00433349609375, "kernel": 0.05029296875}, "v_proj": {"bias": 0.01251220703125, "kernel": 0.076171875}}, "feed_forward": {"intermediate_dense": {"bias": 0.012451171875, "kernel": 0.1640625}, "output_dense": {"bias": 0.005889892578125, "kernel": 0.12255859375}}, "final_layer_norm": {"bias": 0.0279541015625, "scale": 0.022216796875}, "layer_norm": {"bias": 0.0361328125, "scale": 0.03857421875}}, "6": {"attention": {"k_proj": {"bias": 0.00012063980102539062, "kernel": 0.09375}, "out_proj": {"bias": 0.005828857421875, "kernel": 0.177734375}, "q_proj": {"bias": 0.00762939453125, "kernel": 0.08642578125}, "v_proj": {"bias": 0.0126953125, "kernel": 0.154296875}}, "feed_forward": {"intermediate_dense": {"bias": 0.011962890625, "kernel": 0.166015625}, "output_dense": {"bias": 0.00537109375, "kernel": 0.11865234375}}, "final_layer_norm": {"bias": 0.026123046875, "scale": 0.0244140625}, "layer_norm": {"bias": 0.0361328125, "scale": 0.054443359375}}, "7": {"attention": {"k_proj": {"bias": 0.00015163421630859375, "kernel": 0.1181640625}, "out_proj": {"bias": 0.005279541015625, "kernel": 0.185546875}, "q_proj": {"bias": 0.0087890625, "kernel": 0.1015625}, "v_proj": {"bias": 0.0108642578125, "kernel": 0.15234375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0108642578125, "kernel": 0.16015625}, "output_dense": {"bias": 0.00494384765625, "kernel": 0.1171875}}, "final_layer_norm": {"bias": 0.0244140625, "scale": 0.0225830078125}, "layer_norm": {"bias": 0.036376953125, "scale": 0.05078125}}, "8": {"attention": {"k_proj": {"bias": 0.00010967254638671875, "kernel": 0.080078125}, "out_proj": {"bias": 0.00506591796875, "kernel": 0.142578125}, "q_proj": {"bias": 0.006103515625, "kernel": 0.0751953125}, "v_proj": {"bias": 0.01031494140625, "kernel": 0.12451171875}}, "feed_forward": {"intermediate_dense": {"bias": 0.01068115234375, "kernel": 0.15625}, "output_dense": {"bias": 0.00482177734375, "kernel": 0.1103515625}}, "final_layer_norm": {"bias": 0.024658203125, "scale": 0.0299072265625}, "layer_norm": {"bias": 0.030029296875, "scale": 0.038330078125}}, "9": {"attention": {"k_proj": {"bias": 0.00016021728515625, "kernel": 0.11279296875}, "out_proj": {"bias": 0.004638671875, "kernel": 0.2109375}, "q_proj": {"bias": 0.00811767578125, "kernel": 0.1103515625}, "v_proj": {"bias": 0.00958251953125, "kernel": 0.1884765625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0087890625, "kernel": 0.13671875}, "output_dense": {"bias": 0.0045166015625, "kernel": 0.1015625}}, "final_layer_norm": {"bias": 0.018310546875, "scale": 0.02294921875}, "layer_norm": {"bias": 0.030029296875, "scale": 0.040283203125}}}, "pos_conv_embed": {"conv": {"bias": 0.01806640625, "weight_g": 0.05908203125, "weight_v": 0.212890625}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "1": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "2": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "3": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "4": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "5": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "6": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}}}, "feature_projection": {"layer_norm": {"bias": 0.08056640625, "scale": 0.140625}, "projection": {"bias": 0.0277099609375, "kernel": 0.578125}}, "masked_spec_embed": 0.0}}, "layer_param_norm/": {"lm_head": {"bias": 0.5209351181983948, "kernel": 37.544525146484375}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 40.673057556152344, "scale": 69.13858032226562}, "layers": {"0": {"attention": {"k_proj": {"bias": 0.8113077282905579, "kernel": 32.96989440917969}, "out_proj": {"bias": 1.9174251556396484, "kernel": 29.265724182128906}, "q_proj": {"bias": 2.1440391540527344, "kernel": 32.71912384033203}, "v_proj": {"bias": 0.550544261932373, "kernel": 29.496604919433594}}, "feed_forward": {"intermediate_dense": {"bias": 2.644195556640625, "kernel": 115.65182495117188}, "output_dense": {"bias": 1.2946953773498535, "kernel": 111.85960388183594}}, "final_layer_norm": {"bias": 1.646620750427246, "scale": 19.787660598754883}, "layer_norm": {"bias": 2.6475274562835693, "scale": 13.635727882385254}}, "1": {"attention": {"k_proj": {"bias": 0.9440380930900574, "kernel": 47.31852340698242}, "out_proj": {"bias": 1.4071073532104492, "kernel": 48.500152587890625}, "q_proj": {"bias": 3.466937303543091, "kernel": 47.20008087158203}, "v_proj": {"bias": 0.39827921986579895, "kernel": 46.49374771118164}}, "feed_forward": {"intermediate_dense": {"bias": 2.555637836456299, "kernel": 108.72433471679688}, "output_dense": {"bias": 0.940125584602356, "kernel": 95.33328247070312}}, "final_layer_norm": {"bias": 1.4362114667892456, "scale": 19.190898895263672}, "layer_norm": {"bias": 1.7816333770751953, "scale": 15.636941909790039}}, "10": {"attention": {"k_proj": {"bias": 1.0017592906951904, "kernel": 55.2117919921875}, "out_proj": {"bias": 1.3662875890731812, "kernel": 56.87333297729492}, "q_proj": {"bias": 2.73098087310791, "kernel": 55.304718017578125}, "v_proj": {"bias": 0.43543821573257446, "kernel": 56.372047424316406}}, "feed_forward": {"intermediate_dense": {"bias": 2.429971694946289, "kernel": 111.598876953125}, "output_dense": {"bias": 0.696595311164856, "kernel": 103.65608215332031}}, "final_layer_norm": {"bias": 2.2242610454559326, "scale": 21.256866455078125}, "layer_norm": {"bias": 1.7229793071746826, "scale": 19.618568420410156}}, "11": {"attention": {"k_proj": {"bias": 1.1066579818725586, "kernel": 56.49891662597656}, "out_proj": {"bias": 1.2154837846755981, "kernel": 57.795780181884766}, "q_proj": {"bias": 2.68850040435791, "kernel": 56.589378356933594}, "v_proj": {"bias": 0.5430388450622559, "kernel": 57.24900436401367}}, "feed_forward": {"intermediate_dense": {"bias": 2.4684741497039795, "kernel": 112.54217529296875}, "output_dense": {"bias": 0.6669754981994629, "kernel": 105.25094604492188}}, "final_layer_norm": {"bias": 2.236138343811035, "scale": 21.454490661621094}, "layer_norm": {"bias": 1.7860324382781982, "scale": 20.937978744506836}}, "12": {"attention": {"k_proj": {"bias": 1.0459033250808716, "kernel": 56.80964660644531}, "out_proj": {"bias": 1.183690071105957, "kernel": 57.24894714355469}, "q_proj": {"bias": 2.539783477783203, "kernel": 56.91714096069336}, "v_proj": {"bias": 0.5111549496650696, "kernel": 56.852561950683594}}, "feed_forward": {"intermediate_dense": {"bias": 2.504671096801758, "kernel": 113.58503723144531}, "output_dense": {"bias": 0.6515496969223022, "kernel": 107.18756866455078}}, "final_layer_norm": {"bias": 2.127401351928711, "scale": 21.47256851196289}, "layer_norm": {"bias": 1.828575849533081, "scale": 20.734970092773438}}, "13": {"attention": {"k_proj": {"bias": 1.0867606401443481, "kernel": 58.8643798828125}, "out_proj": {"bias": 1.2046430110931396, "kernel": 57.566001892089844}, "q_proj": {"bias": 2.4900307655334473, "kernel": 58.98124694824219}, "v_proj": {"bias": 0.49399328231811523, "kernel": 57.03108215332031}}, "feed_forward": {"intermediate_dense": {"bias": 2.5295088291168213, "kernel": 114.78202056884766}, "output_dense": {"bias": 0.6861958503723145, "kernel": 108.24507904052734}}, "final_layer_norm": {"bias": 2.0789852142333984, "scale": 21.603464126586914}, "layer_norm": {"bias": 2.0034894943237305, "scale": 22.032760620117188}}, "14": {"attention": {"k_proj": {"bias": 1.0542607307434082, "kernel": 58.417381286621094}, "out_proj": {"bias": 1.3672080039978027, "kernel": 55.00845718383789}, "q_proj": {"bias": 2.609252452850342, "kernel": 58.61229705810547}, "v_proj": {"bias": 0.458562433719635, "kernel": 53.856224060058594}}, "feed_forward": {"intermediate_dense": {"bias": 2.574413299560547, "kernel": 115.60798645019531}, "output_dense": {"bias": 0.7414776086807251, "kernel": 110.05062866210938}}, "final_layer_norm": {"bias": 2.15677809715271, "scale": 21.850406646728516}, "layer_norm": {"bias": 2.0840797424316406, "scale": 21.36309051513672}}, "15": {"attention": {"k_proj": {"bias": 1.1064494848251343, "kernel": 59.414955139160156}, "out_proj": {"bias": 1.494602084159851, "kernel": 57.69557189941406}, "q_proj": {"bias": 2.640852928161621, "kernel": 59.76347351074219}, "v_proj": {"bias": 0.5933749675750732, "kernel": 56.775291442871094}}, "feed_forward": {"intermediate_dense": {"bias": 2.612638473510742, "kernel": 115.66761779785156}, "output_dense": {"bias": 0.9184398651123047, "kernel": 111.55628204345703}}, "final_layer_norm": {"bias": 2.325559616088867, "scale": 21.665550231933594}, "layer_norm": {"bias": 2.3960001468658447, "scale": 23.560142517089844}}, "16": {"attention": {"k_proj": {"bias": 1.0375196933746338, "kernel": 58.81951141357422}, "out_proj": {"bias": 1.4001483917236328, "kernel": 56.30195236206055}, "q_proj": {"bias": 2.7167716026306152, "kernel": 58.95283889770508}, "v_proj": {"bias": 0.47968268394470215, "kernel": 55.15779495239258}}, "feed_forward": {"intermediate_dense": {"bias": 2.584649085998535, "kernel": 115.93186950683594}, "output_dense": {"bias": 0.9454188346862793, "kernel": 111.9276351928711}}, "final_layer_norm": {"bias": 2.357800006866455, "scale": 22.069507598876953}, "layer_norm": {"bias": 2.2420315742492676, "scale": 21.493070602416992}}, "17": {"attention": {"k_proj": {"bias": 0.9661256074905396, "kernel": 58.111656188964844}, "out_proj": {"bias": 1.331639289855957, "kernel": 54.94080352783203}, "q_proj": {"bias": 2.77952241897583, "kernel": 58.29995346069336}, "v_proj": {"bias": 0.5076445937156677, "kernel": 53.84953689575195}}, "feed_forward": {"intermediate_dense": {"bias": 2.571765899658203, "kernel": 116.83853912353516}, "output_dense": {"bias": 0.9688688516616821, "kernel": 112.32685852050781}}, "final_layer_norm": {"bias": 2.3481884002685547, "scale": 22.518054962158203}, "layer_norm": {"bias": 2.1583070755004883, "scale": 19.719860076904297}}, "18": {"attention": {"k_proj": {"bias": 1.0948385000228882, "kernel": 60.64401626586914}, "out_proj": {"bias": 1.461631417274475, "kernel": 57.33832931518555}, "q_proj": {"bias": 2.702681541442871, "kernel": 61.00336456298828}, "v_proj": {"bias": 0.569412887096405, "kernel": 56.042701721191406}}, "feed_forward": {"intermediate_dense": {"bias": 2.598742961883545, "kernel": 116.98600769042969}, "output_dense": {"bias": 1.1105923652648926, "kernel": 113.83198547363281}}, "final_layer_norm": {"bias": 2.5398643016815186, "scale": 22.73513412475586}, "layer_norm": {"bias": 2.394752025604248, "scale": 22.82726287841797}}, "19": {"attention": {"k_proj": {"bias": 1.0032567977905273, "kernel": 58.73444366455078}, "out_proj": {"bias": 1.471828579902649, "kernel": 55.9002685546875}, "q_proj": {"bias": 2.87642765045166, "kernel": 58.99829864501953}, "v_proj": {"bias": 0.5279619097709656, "kernel": 54.46978759765625}}, "feed_forward": {"intermediate_dense": {"bias": 2.6447794437408447, "kernel": 117.34336853027344}, "output_dense": {"bias": 1.2023404836654663, "kernel": 114.66693115234375}}, "final_layer_norm": {"bias": 2.5727314949035645, "scale": 23.086666107177734}, "layer_norm": {"bias": 2.264643669128418, "scale": 21.297958374023438}}, "2": {"attention": {"k_proj": {"bias": 1.0275315046310425, "kernel": 54.45257568359375}, "out_proj": {"bias": 1.274333119392395, "kernel": 51.26393127441406}, "q_proj": {"bias": 3.554164171218872, "kernel": 54.26152038574219}, "v_proj": {"bias": 0.3945860266685486, "kernel": 50.298126220703125}}, "feed_forward": {"intermediate_dense": {"bias": 2.5680737495422363, "kernel": 113.23712158203125}, "output_dense": {"bias": 0.8139544129371643, "kernel": 98.94503784179688}}, "final_layer_norm": {"bias": 1.4587430953979492, "scale": 21.82147216796875}, "layer_norm": {"bias": 1.594740390777588, "scale": 18.603172302246094}}, "20": {"attention": {"k_proj": {"bias": 0.9424487352371216, "kernel": 57.74285888671875}, "out_proj": {"bias": 1.5207984447479248, "kernel": 54.431156158447266}, "q_proj": {"bias": 2.830753803253174, "kernel": 57.977779388427734}, "v_proj": {"bias": 0.5010366439819336, "kernel": 52.746070861816406}}, "feed_forward": {"intermediate_dense": {"bias": 2.6132960319519043, "kernel": 117.87947082519531}, "output_dense": {"bias": 1.3180112838745117, "kernel": 115.27236938476562}}, "final_layer_norm": {"bias": 2.5551445484161377, "scale": 23.932769775390625}, "layer_norm": {"bias": 2.189181327819824, "scale": 20.310253143310547}}, "21": {"attention": {"k_proj": {"bias": 1.0507590770721436, "kernel": 59.16876983642578}, "out_proj": {"bias": 1.5862836837768555, "kernel": 55.10758590698242}, "q_proj": {"bias": 2.768588066101074, "kernel": 59.5523796081543}, "v_proj": {"bias": 0.6221956014633179, "kernel": 53.624267578125}}, "feed_forward": {"intermediate_dense": {"bias": 2.6529269218444824, "kernel": 118.28579711914062}, "output_dense": {"bias": 1.466293454170227, "kernel": 115.6703872680664}}, "final_layer_norm": {"bias": 2.5929551124572754, "scale": 23.78704261779785}, "layer_norm": {"bias": 2.2577672004699707, "scale": 20.861156463623047}}, "22": {"attention": {"k_proj": {"bias": 1.090219259262085, "kernel": 60.29521179199219}, "out_proj": {"bias": 1.5551979541778564, "kernel": 55.578346252441406}, "q_proj": {"bias": 2.7778778076171875, "kernel": 60.52397918701172}, "v_proj": {"bias": 0.610236644744873, "kernel": 54.83330535888672}}, "feed_forward": {"intermediate_dense": {"bias": 2.558276414871216, "kernel": 118.28363037109375}, "output_dense": {"bias": 1.5429821014404297, "kernel": 115.05326843261719}}, "final_layer_norm": {"bias": 2.4441022872924805, "scale": 23.302928924560547}, "layer_norm": {"bias": 2.268827438354492, "scale": 19.92362403869629}}, "23": {"attention": {"k_proj": {"bias": 1.1548974514007568, "kernel": 63.166141510009766}, "out_proj": {"bias": 1.7926909923553467, "kernel": 58.138999938964844}, "q_proj": {"bias": 2.7788705825805664, "kernel": 63.41739273071289}, "v_proj": {"bias": 0.7552490830421448, "kernel": 58.14105224609375}}, "feed_forward": {"intermediate_dense": {"bias": 2.524137496948242, "kernel": 118.8178482055664}, "output_dense": {"bias": 1.5165748596191406, "kernel": 116.68386840820312}}, "final_layer_norm": {"bias": 2.8677263259887695, "scale": 23.711963653564453}, "layer_norm": {"bias": 2.737786293029785, "scale": 23.203153610229492}}, "24": {"attention": {"k_proj": {"bias": 1.2170816659927368, "kernel": 62.985740661621094}, "out_proj": {"bias": 1.861964464187622, "kernel": 60.554176330566406}, "q_proj": {"bias": 2.946572780609131, "kernel": 62.97161102294922}, "v_proj": {"bias": 0.7363754510879517, "kernel": 59.941978454589844}}, "feed_forward": {"intermediate_dense": {"bias": 2.62442684173584, "kernel": 118.09503173828125}, "output_dense": {"bias": 1.5554132461547852, "kernel": 119.34771728515625}}, "final_layer_norm": {"bias": 2.8768773078918457, "scale": 23.78775405883789}, "layer_norm": {"bias": 2.5165457725524902, "scale": 22.2713680267334}}, "25": {"attention": {"k_proj": {"bias": 1.1492834091186523, "kernel": 62.31687927246094}, "out_proj": {"bias": 1.5715079307556152, "kernel": 57.193145751953125}, "q_proj": {"bias": 2.958195686340332, "kernel": 62.32655715942383}, "v_proj": {"bias": 0.709597110748291, "kernel": 57.22730255126953}}, "feed_forward": {"intermediate_dense": {"bias": 2.528472423553467, "kernel": 118.43785858154297}, "output_dense": {"bias": 1.291271686553955, "kernel": 119.30908966064453}}, "final_layer_norm": {"bias": 2.6912477016448975, "scale": 24.2921142578125}, "layer_norm": {"bias": 2.49531888961792, "scale": 21.015914916992188}}, "26": {"attention": {"k_proj": {"bias": 1.1055383682250977, "kernel": 61.289649963378906}, "out_proj": {"bias": 1.4457355737686157, "kernel": 56.96365737915039}, "q_proj": {"bias": 3.056081533432007, "kernel": 61.178016662597656}, "v_proj": {"bias": 0.5886263251304626, "kernel": 57.36988067626953}}, "feed_forward": {"intermediate_dense": {"bias": 2.622715473175049, "kernel": 117.62094116210938}, "output_dense": {"bias": 1.2430529594421387, "kernel": 115.52780151367188}}, "final_layer_norm": {"bias": 2.310211181640625, "scale": 23.112323760986328}, "layer_norm": {"bias": 2.384587526321411, "scale": 21.02827262878418}}, "27": {"attention": {"k_proj": {"bias": 1.2962713241577148, "kernel": 63.69104766845703}, "out_proj": {"bias": 1.666663408279419, "kernel": 60.368343353271484}, "q_proj": {"bias": 2.90488862991333, "kernel": 63.8581428527832}, "v_proj": {"bias": 0.7426369786262512, "kernel": 60.566261291503906}}, "feed_forward": {"intermediate_dense": {"bias": 2.752497434616089, "kernel": 116.50486755371094}, "output_dense": {"bias": 1.0676369667053223, "kernel": 115.45014953613281}}, "final_layer_norm": {"bias": 2.5837950706481934, "scale": 22.617046356201172}, "layer_norm": {"bias": 2.5489883422851562, "scale": 23.00310516357422}}, "28": {"attention": {"k_proj": {"bias": 1.2439610958099365, "kernel": 63.82756805419922}, "out_proj": {"bias": 1.6906780004501343, "kernel": 59.75843811035156}, "q_proj": {"bias": 3.1427388191223145, "kernel": 63.66680908203125}, "v_proj": {"bias": 0.6593037843704224, "kernel": 60.21324920654297}}, "feed_forward": {"intermediate_dense": {"bias": 2.7267439365386963, "kernel": 116.87728118896484}, "output_dense": {"bias": 0.8951385021209717, "kernel": 117.35613250732422}}, "final_layer_norm": {"bias": 2.5781450271606445, "scale": 23.051219940185547}, "layer_norm": {"bias": 2.153111457824707, "scale": 23.46197509765625}}, "29": {"attention": {"k_proj": {"bias": 1.2186551094055176, "kernel": 62.70256042480469}, "out_proj": {"bias": 1.6350669860839844, "kernel": 62.73187255859375}, "q_proj": {"bias": 3.0973498821258545, "kernel": 62.65479278564453}, "v_proj": {"bias": 0.610160231590271, "kernel": 62.84513473510742}}, "feed_forward": {"intermediate_dense": {"bias": 2.744981527328491, "kernel": 118.20719909667969}, "output_dense": {"bias": 1.119023084640503, "kernel": 122.39694213867188}}, "final_layer_norm": {"bias": 2.7925782203674316, "scale": 24.41854476928711}, "layer_norm": {"bias": 2.2828850746154785, "scale": 24.434240341186523}}, "3": {"attention": {"k_proj": {"bias": 1.045365333557129, "kernel": 58.58887481689453}, "out_proj": {"bias": 1.4475125074386597, "kernel": 53.75390625}, "q_proj": {"bias": 3.09413480758667, "kernel": 58.85149383544922}, "v_proj": {"bias": 0.3790353536605835, "kernel": 53.268436431884766}}, "feed_forward": {"intermediate_dense": {"bias": 2.529479503631592, "kernel": 114.88238525390625}, "output_dense": {"bias": 0.7774725556373596, "kernel": 102.1509780883789}}, "final_layer_norm": {"bias": 1.6940510272979736, "scale": 22.27197265625}, "layer_norm": {"bias": 1.81778883934021, "scale": 21.501422882080078}}, "30": {"attention": {"k_proj": {"bias": 1.2389600276947021, "kernel": 63.414527893066406}, "out_proj": {"bias": 1.4616260528564453, "kernel": 59.048439025878906}, "q_proj": {"bias": 3.1629326343536377, "kernel": 63.59883117675781}, "v_proj": {"bias": 0.6426811218261719, "kernel": 59.6822395324707}}, "feed_forward": {"intermediate_dense": {"bias": 2.716099739074707, "kernel": 118.70901489257812}, "output_dense": {"bias": 1.0879268646240234, "kernel": 122.34607696533203}}, "final_layer_norm": {"bias": 2.716019868850708, "scale": 25.517379760742188}, "layer_norm": {"bias": 2.2954940795898438, "scale": 24.266014099121094}}, "31": {"attention": {"k_proj": {"bias": 1.2554137706756592, "kernel": 61.94691467285156}, "out_proj": {"bias": 1.3945598602294922, "kernel": 58.343170166015625}, "q_proj": {"bias": 2.928905487060547, "kernel": 62.19603729248047}, "v_proj": {"bias": 0.6489136815071106, "kernel": 58.896881103515625}}, "feed_forward": {"intermediate_dense": {"bias": 2.7946105003356934, "kernel": 117.29702758789062}, "output_dense": {"bias": 1.2543851137161255, "kernel": 119.12557983398438}}, "final_layer_norm": {"bias": 2.5259289741516113, "scale": 25.29692840576172}, "layer_norm": {"bias": 2.29666805267334, "scale": 23.876663208007812}}, "32": {"attention": {"k_proj": {"bias": 1.2868143320083618, "kernel": 63.18997573852539}, "out_proj": {"bias": 1.3791754245758057, "kernel": 58.52682113647461}, "q_proj": {"bias": 3.0975964069366455, "kernel": 63.27565383911133}, "v_proj": {"bias": 0.601915717124939, "kernel": 59.497825622558594}}, "feed_forward": {"intermediate_dense": {"bias": 2.7597403526306152, "kernel": 116.60746002197266}, "output_dense": {"bias": 1.3182942867279053, "kernel": 118.97764587402344}}, "final_layer_norm": {"bias": 2.5389628410339355, "scale": 25.583377838134766}, "layer_norm": {"bias": 2.4226255416870117, "scale": 23.973602294921875}}, "33": {"attention": {"k_proj": {"bias": 1.320791482925415, "kernel": 62.965518951416016}, "out_proj": {"bias": 1.4235005378723145, "kernel": 58.20710754394531}, "q_proj": {"bias": 3.219902992248535, "kernel": 63.21660614013672}, "v_proj": {"bias": 0.6391017436981201, "kernel": 59.16463088989258}}, "feed_forward": {"intermediate_dense": {"bias": 2.768444299697876, "kernel": 115.30213928222656}, "output_dense": {"bias": 1.3450939655303955, "kernel": 116.90930938720703}}, "final_layer_norm": {"bias": 2.443160057067871, "scale": 25.242919921875}, "layer_norm": {"bias": 2.4992477893829346, "scale": 24.030704498291016}}, "34": {"attention": {"k_proj": {"bias": 1.2810293436050415, "kernel": 61.9166259765625}, "out_proj": {"bias": 1.67946195602417, "kernel": 58.462928771972656}, "q_proj": {"bias": 3.1653943061828613, "kernel": 62.24740982055664}, "v_proj": {"bias": 0.5884761810302734, "kernel": 59.42552185058594}}, "feed_forward": {"intermediate_dense": {"bias": 2.8799657821655273, "kernel": 114.03166198730469}, "output_dense": {"bias": 1.2904592752456665, "kernel": 115.12754821777344}}, "final_layer_norm": {"bias": 2.3931610584259033, "scale": 24.269775390625}, "layer_norm": {"bias": 2.5567469596862793, "scale": 24.71498680114746}}, "35": {"attention": {"k_proj": {"bias": 1.412321925163269, "kernel": 66.12313842773438}, "out_proj": {"bias": 1.5879793167114258, "kernel": 57.30572509765625}, "q_proj": {"bias": 2.8138043880462646, "kernel": 66.92033386230469}, "v_proj": {"bias": 0.5868934392929077, "kernel": 58.715667724609375}}, "feed_forward": {"intermediate_dense": {"bias": 2.996635675430298, "kernel": 112.12554931640625}, "output_dense": {"bias": 1.1457792520523071, "kernel": 112.55303955078125}}, "final_layer_norm": {"bias": 2.4263648986816406, "scale": 23.947580337524414}, "layer_norm": {"bias": 2.5475754737854004, "scale": 25.059457778930664}}, "36": {"attention": {"k_proj": {"bias": 1.391077995300293, "kernel": 62.801483154296875}, "out_proj": {"bias": 1.5890069007873535, "kernel": 57.00489044189453}, "q_proj": {"bias": 2.8097341060638428, "kernel": 63.267112731933594}, "v_proj": {"bias": 0.4997527599334717, "kernel": 58.747901916503906}}, "feed_forward": {"intermediate_dense": {"bias": 2.8604254722595215, "kernel": 110.12071990966797}, "output_dense": {"bias": 1.111302375793457, "kernel": 111.17442321777344}}, "final_layer_norm": {"bias": 2.3463120460510254, "scale": 24.294052124023438}, "layer_norm": {"bias": 2.4367589950561523, "scale": 24.563945770263672}}, "37": {"attention": {"k_proj": {"bias": 1.377180814743042, "kernel": 60.198909759521484}, "out_proj": {"bias": 1.8268768787384033, "kernel": 55.82686996459961}, "q_proj": {"bias": 2.588944673538208, "kernel": 60.45283508300781}, "v_proj": {"bias": 0.4627252519130707, "kernel": 57.156349182128906}}, "feed_forward": {"intermediate_dense": {"bias": 2.756605625152588, "kernel": 108.66197967529297}, "output_dense": {"bias": 1.1040644645690918, "kernel": 110.17047119140625}}, "final_layer_norm": {"bias": 2.1047685146331787, "scale": 24.651269912719727}, "layer_norm": {"bias": 2.4145102500915527, "scale": 24.709896087646484}}, "38": {"attention": {"k_proj": {"bias": 1.3716527223587036, "kernel": 58.29988098144531}, "out_proj": {"bias": 1.5120515823364258, "kernel": 54.76201629638672}, "q_proj": {"bias": 2.4799768924713135, "kernel": 58.57093811035156}, "v_proj": {"bias": 0.4617878794670105, "kernel": 55.92902374267578}}, "feed_forward": {"intermediate_dense": {"bias": 2.7166287899017334, "kernel": 106.1817626953125}, "output_dense": {"bias": 1.0576279163360596, "kernel": 107.87837219238281}}, "final_layer_norm": {"bias": 2.127455711364746, "scale": 25.347200393676758}, "layer_norm": {"bias": 2.5460262298583984, "scale": 25.71381378173828}}, "39": {"attention": {"k_proj": {"bias": 1.3350980281829834, "kernel": 56.98695755004883}, "out_proj": {"bias": 1.770888328552246, "kernel": 54.216217041015625}, "q_proj": {"bias": 2.252932548522949, "kernel": 57.44124984741211}, "v_proj": {"bias": 0.48440665006637573, "kernel": 55.59510803222656}}, "feed_forward": {"intermediate_dense": {"bias": 2.6787919998168945, "kernel": 102.95674133300781}, "output_dense": {"bias": 1.1332277059555054, "kernel": 106.65170288085938}}, "final_layer_norm": {"bias": 2.0098838806152344, "scale": 25.985471725463867}, "layer_norm": {"bias": 2.527939558029175, "scale": 26.264446258544922}}, "4": {"attention": {"k_proj": {"bias": 1.0399675369262695, "kernel": 60.86566162109375}, "out_proj": {"bias": 1.6746083498001099, "kernel": 55.53059387207031}, "q_proj": {"bias": 2.8236005306243896, "kernel": 61.19404602050781}, "v_proj": {"bias": 0.4068540036678314, "kernel": 55.1674919128418}}, "feed_forward": {"intermediate_dense": {"bias": 2.480872631072998, "kernel": 114.44273376464844}, "output_dense": {"bias": 0.9266092777252197, "kernel": 103.9228286743164}}, "final_layer_norm": {"bias": 1.8764877319335938, "scale": 21.887744903564453}, "layer_norm": {"bias": 1.9884653091430664, "scale": 22.927963256835938}}, "40": {"attention": {"k_proj": {"bias": 1.3182929754257202, "kernel": 55.02893829345703}, "out_proj": {"bias": 1.7076668739318848, "kernel": 51.667320251464844}, "q_proj": {"bias": 2.1885828971862793, "kernel": 55.7418212890625}, "v_proj": {"bias": 0.5162162780761719, "kernel": 52.312015533447266}}, "feed_forward": {"intermediate_dense": {"bias": 2.5248398780822754, "kernel": 99.9987564086914}, "output_dense": {"bias": 1.1562472581863403, "kernel": 103.56117248535156}}, "final_layer_norm": {"bias": 1.944197416305542, "scale": 25.269933700561523}, "layer_norm": {"bias": 2.4193692207336426, "scale": 25.71145248413086}}, "41": {"attention": {"k_proj": {"bias": 2.1187949180603027, "kernel": 54.60551071166992}, "out_proj": {"bias": 1.4731452465057373, "kernel": 53.70256423950195}, "q_proj": {"bias": 1.8876457214355469, "kernel": 54.81938171386719}, "v_proj": {"bias": 0.5346659421920776, "kernel": 54.117794036865234}}, "feed_forward": {"intermediate_dense": {"bias": 2.606231212615967, "kernel": 96.09676361083984}, "output_dense": {"bias": 1.1918582916259766, "kernel": 101.55107879638672}}, "final_layer_norm": {"bias": 2.287397861480713, "scale": 28.635906219482422}, "layer_norm": {"bias": 2.4100098609924316, "scale": 27.684532165527344}}, "42": {"attention": {"k_proj": {"bias": 1.4359550476074219, "kernel": 49.58379364013672}, "out_proj": {"bias": 1.5237478017807007, "kernel": 46.391700744628906}, "q_proj": {"bias": 1.7593891620635986, "kernel": 50.26066207885742}, "v_proj": {"bias": 0.7900923490524292, "kernel": 45.55311584472656}}, "feed_forward": {"intermediate_dense": {"bias": 2.500276803970337, "kernel": 95.7851333618164}, "output_dense": {"bias": 1.2357020378112793, "kernel": 100.039306640625}}, "final_layer_norm": {"bias": 2.0316760540008545, "scale": 29.865612030029297}, "layer_norm": {"bias": 1.8518010377883911, "scale": 26.180740356445312}}, "43": {"attention": {"k_proj": {"bias": 1.621580958366394, "kernel": 43.34894561767578}, "out_proj": {"bias": 1.4808337688446045, "kernel": 42.41896057128906}, "q_proj": {"bias": 1.560662031173706, "kernel": 43.89337921142578}, "v_proj": {"bias": 0.6446149349212646, "kernel": 40.75067901611328}}, "feed_forward": {"intermediate_dense": {"bias": 2.41300106048584, "kernel": 93.46316528320312}, "output_dense": {"bias": 0.9687212705612183, "kernel": 97.31781768798828}}, "final_layer_norm": {"bias": 2.1836979389190674, "scale": 32.05499267578125}, "layer_norm": {"bias": 1.9327057600021362, "scale": 24.622459411621094}}, "44": {"attention": {"k_proj": {"bias": 2.7212073802948, "kernel": 42.08189010620117}, "out_proj": {"bias": 1.2360304594039917, "kernel": 45.85190963745117}, "q_proj": {"bias": 1.5498037338256836, "kernel": 42.5997200012207}, "v_proj": {"bias": 0.4342782497406006, "kernel": 45.199851989746094}}, "feed_forward": {"intermediate_dense": {"bias": 2.3771462440490723, "kernel": 92.01663208007812}, "output_dense": {"bias": 0.8690996766090393, "kernel": 94.94656372070312}}, "final_layer_norm": {"bias": 2.274548053741455, "scale": 34.271636962890625}, "layer_norm": {"bias": 1.8190332651138306, "scale": 24.816062927246094}}, "45": {"attention": {"k_proj": {"bias": 2.2774922847747803, "kernel": 40.51180648803711}, "out_proj": {"bias": 1.1032899618148804, "kernel": 49.391014099121094}, "q_proj": {"bias": 1.5832633972167969, "kernel": 40.733741760253906}, "v_proj": {"bias": 0.4726644456386566, "kernel": 49.63447570800781}}, "feed_forward": {"intermediate_dense": {"bias": 2.3599283695220947, "kernel": 89.35115051269531}, "output_dense": {"bias": 0.9707900285720825, "kernel": 91.044921875}}, "final_layer_norm": {"bias": 1.873668909072876, "scale": 33.35442352294922}, "layer_norm": {"bias": 1.6655107736587524, "scale": 23.580810546875}}, "46": {"attention": {"k_proj": {"bias": 1.806941270828247, "kernel": 40.38396072387695}, "out_proj": {"bias": 0.8824916481971741, "kernel": 51.85765075683594}, "q_proj": {"bias": 1.6926847696304321, "kernel": 41.41037368774414}, "v_proj": {"bias": 0.4726625978946686, "kernel": 52.68905258178711}}, "feed_forward": {"intermediate_dense": {"bias": 2.261974811553955, "kernel": 85.352783203125}, "output_dense": {"bias": 1.160945177078247, "kernel": 82.72911071777344}}, "final_layer_norm": {"bias": 1.6990585327148438, "scale": 29.25632095336914}, "layer_norm": {"bias": 1.49913489818573, "scale": 22.58135986328125}}, "47": {"attention": {"k_proj": {"bias": 1.2597920894622803, "kernel": 43.66704559326172}, "out_proj": {"bias": 0.774490475654602, "kernel": 48.23713302612305}, "q_proj": {"bias": 1.856778621673584, "kernel": 45.34018325805664}, "v_proj": {"bias": 0.7235761284828186, "kernel": 48.86346435546875}}, "feed_forward": {"intermediate_dense": {"bias": 2.240572929382324, "kernel": 82.07774353027344}, "output_dense": {"bias": 0.6869561672210693, "kernel": 77.27716064453125}}, "final_layer_norm": {"bias": 1.4815858602523804, "scale": 23.925792694091797}, "layer_norm": {"bias": 1.5178518295288086, "scale": 20.47167205810547}}, "5": {"attention": {"k_proj": {"bias": 0.9897308945655823, "kernel": 56.03916931152344}, "out_proj": {"bias": 1.609462857246399, "kernel": 55.549888610839844}, "q_proj": {"bias": 3.041853904724121, "kernel": 56.076255798339844}, "v_proj": {"bias": 0.37399306893348694, "kernel": 55.713050842285156}}, "feed_forward": {"intermediate_dense": {"bias": 2.4135913848876953, "kernel": 114.26228332519531}, "output_dense": {"bias": 0.9583671689033508, "kernel": 102.99665832519531}}, "final_layer_norm": {"bias": 2.0126075744628906, "scale": 22.076251983642578}, "layer_norm": {"bias": 1.8507163524627686, "scale": 21.015605926513672}}, "6": {"attention": {"k_proj": {"bias": 1.0909814834594727, "kernel": 58.81627655029297}, "out_proj": {"bias": 1.611471176147461, "kernel": 56.600990295410156}, "q_proj": {"bias": 3.026271343231201, "kernel": 59.24803161621094}, "v_proj": {"bias": 0.4057280123233795, "kernel": 56.18861770629883}}, "feed_forward": {"intermediate_dense": {"bias": 2.381702423095703, "kernel": 113.72636413574219}, "output_dense": {"bias": 0.8466658592224121, "kernel": 103.51261901855469}}, "final_layer_norm": {"bias": 2.303737163543701, "scale": 21.543832778930664}, "layer_norm": {"bias": 1.9191420078277588, "scale": 22.41590118408203}}, "7": {"attention": {"k_proj": {"bias": 0.9629570841789246, "kernel": 57.23834991455078}, "out_proj": {"bias": 1.4620752334594727, "kernel": 56.51910400390625}, "q_proj": {"bias": 2.7594337463378906, "kernel": 57.74028396606445}, "v_proj": {"bias": 0.4560803174972534, "kernel": 55.70353317260742}}, "feed_forward": {"intermediate_dense": {"bias": 2.375901222229004, "kernel": 113.39501953125}, "output_dense": {"bias": 0.6902457475662231, "kernel": 103.27099609375}}, "final_layer_norm": {"bias": 2.27414608001709, "scale": 21.64269256591797}, "layer_norm": {"bias": 1.8735694885253906, "scale": 21.6693172454834}}, "8": {"attention": {"k_proj": {"bias": 1.0564680099487305, "kernel": 57.36131286621094}, "out_proj": {"bias": 1.2816615104675293, "kernel": 56.908939361572266}, "q_proj": {"bias": 2.755068302154541, "kernel": 57.48906707763672}, "v_proj": {"bias": 0.4299778640270233, "kernel": 56.28636169433594}}, "feed_forward": {"intermediate_dense": {"bias": 2.414928674697876, "kernel": 112.85737609863281}, "output_dense": {"bias": 0.6650994420051575, "kernel": 102.85050964355469}}, "final_layer_norm": {"bias": 2.1756978034973145, "scale": 21.386672973632812}, "layer_norm": {"bias": 1.8388488292694092, "scale": 21.417198181152344}}, "9": {"attention": {"k_proj": {"bias": 1.1448609828948975, "kernel": 59.02931594848633}, "out_proj": {"bias": 1.571290135383606, "kernel": 58.968345642089844}, "q_proj": {"bias": 2.571730613708496, "kernel": 59.39432907104492}, "v_proj": {"bias": 0.5222671627998352, "kernel": 58.37605285644531}}, "feed_forward": {"intermediate_dense": {"bias": 2.4743833541870117, "kernel": 111.38484191894531}, "output_dense": {"bias": 0.7894808053970337, "kernel": 102.71121215820312}}, "final_layer_norm": {"bias": 2.2142271995544434, "scale": 20.610157012939453}, "layer_norm": {"bias": 2.063072681427002, "scale": 23.753849029541016}}}, "pos_conv_embed": {"conv": {"bias": 6.124717712402344, "weight_g": 9.593154907226562, "weight_v": 122.927978515625}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 2.0290679931640625, "kernel": 20.55536460876465}, "layer_norm": {"bias": 4.550922393798828, "scale": 16.167570114135742}}, "1": {"conv": {"bias": 1.7790228128433228, "kernel": 51.24136734008789}, "layer_norm": {"bias": 5.962646961212158, "scale": 23.268157958984375}}, "2": {"conv": {"bias": 1.140576720237732, "kernel": 46.50312042236328}, "layer_norm": {"bias": 4.176670551300049, "scale": 20.370853424072266}}, "3": {"conv": {"bias": 0.6725863218307495, "kernel": 44.397525787353516}, "layer_norm": {"bias": 3.888174533843994, "scale": 17.53795051574707}}, "4": {"conv": {"bias": 0.6373162269592285, "kernel": 41.314056396484375}, "layer_norm": {"bias": 2.385471820831299, "scale": 16.34571647644043}}, "5": {"conv": {"bias": 0.5147221684455872, "kernel": 37.479759216308594}, "layer_norm": {"bias": 2.020900011062622, "scale": 17.064470291137695}}, "6": {"conv": {"bias": 0.4947893023490906, "kernel": 40.64780044555664}, "layer_norm": {"bias": 0.5876954793930054, "scale": 19.058603286743164}}}}, "feature_projection": {"layer_norm": {"bias": 6.461916923522949, "scale": 16.541587829589844}, "projection": {"bias": 2.1418182849884033, "kernel": 42.870758056640625}}, "masked_spec_embed": 11.914372444152832}}, "train/learning_rate": 0.00034837110433727503, "train/loss": 0.41222909092903137, "train/param_norm": 1363.845458984375, "_runtime": 79238, "_timestamp": 1659371982, "_step": 27900, "eval/loss": 0.61622154712677, "eval/wer": 0.465554185119408, "eval/cer": 0.13027084385848514, "eval/step_4k": {"_type": "table-file", "path": "media/table/eval/step_4k_4000_af4cafd73c286841ef2f.table.json", "sha256": "af4cafd73c286841ef2fce257a64583667ab5412cd6837e4b951b2f851540450", "size": 24260, "artifact_path": "wandb-client-artifact://8wsujunwuradmkiy9teyal5atub99m9zxf1gks0x84p3y9bcbrdwjhad89ar5fxnyqtn8bopmk4501qsp1nuyvfeafw7p4spyxkt5zczfl5bxv88dzgxi32ukaj3dp4j:latest/eval/step_4k.table.json", "_latest_artifact_path": "wandb-client-artifact://8wsujunwuradmkiy9teyal5atub99m9zxf1gks0x84p3y9bcbrdwjhad89ar5fxnyqtn8bopmk4501qsp1nuyvfeafw7p4spyxkt5zczfl5bxv88dzgxi32ukaj3dp4j:latest/eval/step_4k.table.json", "ncols": 2, "nrows": 50}, "eval/step_8k": {"_type": "table-file", "path": "media/table/eval/step_8k_8000_c8ddc6e8e3a9e52ebbba.table.json", "sha256": "c8ddc6e8e3a9e52ebbbae9ac6ec8bb7ae6684781548fb4ea5c57a4b03a72d655", "size": 25822, "artifact_path": "wandb-client-artifact://och35iwsgdf5e7r6ebwp8bo4p13eowiyessoabvv4cf6keb0gk4e1577q2io23l2jh4jrzauz2qyodfw4w6u4eyf8llym88t1brov3snl0vrwcrq3dalvmazc40labfc:latest/eval/step_8k.table.json", "_latest_artifact_path": "wandb-client-artifact://och35iwsgdf5e7r6ebwp8bo4p13eowiyessoabvv4cf6keb0gk4e1577q2io23l2jh4jrzauz2qyodfw4w6u4eyf8llym88t1brov3snl0vrwcrq3dalvmazc40labfc:latest/eval/step_8k.table.json", "ncols": 2, "nrows": 50}, "eval/step_12k": {"_type": "table-file", "path": "media/table/eval/step_12k_12000_697630eb77c56222f807.table.json", "sha256": "697630eb77c56222f80728b3497df5ebfe62fb1dd060725ab84ec28fcf8448a3", "size": 25625, "artifact_path": "wandb-client-artifact://ndqwcshgeo7e3e4lndtrefve494sq9zmx9n9lrqdirtgy63uloydfb95oh1cytys0xi7ugpxbq1rub03y9scmcm41ocpbk826sbeejkgr3aubqet78b4jx4d8fb3z14k:latest/eval/step_12k.table.json", "_latest_artifact_path": "wandb-client-artifact://ndqwcshgeo7e3e4lndtrefve494sq9zmx9n9lrqdirtgy63uloydfb95oh1cytys0xi7ugpxbq1rub03y9scmcm41ocpbk826sbeejkgr3aubqet78b4jx4d8fb3z14k:latest/eval/step_12k.table.json", "ncols": 2, "nrows": 50}, "eval/step_16k": {"_type": "table-file", "path": "media/table/eval/step_16k_16000_a8af015baca8352e331a.table.json", "sha256": "a8af015baca8352e331a32965ddaa7fe22e2119a1c1256e539aedfd2cb876b87", "size": 25878, "artifact_path": "wandb-client-artifact://5lnl6ihmavrhu81b4ehn69ru2j2zbl0h27qv20im44v9928o4s69g9nvnb8oni6t0b921jwgbo24pz870kgos572o5h3vkwm71kr1brda3f3ooretb0u164vptzypekc:latest/eval/step_16k.table.json", "_latest_artifact_path": "wandb-client-artifact://5lnl6ihmavrhu81b4ehn69ru2j2zbl0h27qv20im44v9928o4s69g9nvnb8oni6t0b921jwgbo24pz870kgos572o5h3vkwm71kr1brda3f3ooretb0u164vptzypekc:latest/eval/step_16k.table.json", "ncols": 2, "nrows": 50}, "eval/step_20k": {"_type": "table-file", "path": "media/table/eval/step_20k_20000_37ce73b5cf7c7934cf62.table.json", "sha256": "37ce73b5cf7c7934cf62628174d0b0af065414de25e751b9e98983313d2b352e", "size": 25998, "artifact_path": "wandb-client-artifact://dhxussc15vwxa3x78tfa9270hif7fap7tu63y3eysgx549vfxso9k96434vhylx2uacjad6ldxuh7iavo5ogtn6fqtsv0u6d60zpvy5g99bbhwo8mmzkveldckon4ngf:latest/eval/step_20k.table.json", "_latest_artifact_path": "wandb-client-artifact://dhxussc15vwxa3x78tfa9270hif7fap7tu63y3eysgx549vfxso9k96434vhylx2uacjad6ldxuh7iavo5ogtn6fqtsv0u6d60zpvy5g99bbhwo8mmzkveldckon4ngf:latest/eval/step_20k.table.json", "ncols": 2, "nrows": 50}, "eval/step_24k": {"_type": "table-file", "path": "media/table/eval/step_24k_24000_6d0ed7e79108396fc292.table.json", "sha256": "6d0ed7e79108396fc292429957faec4cfead67d1cb5df1dfb6e4064ac1b8efd8", "size": 26106, "artifact_path": "wandb-client-artifact://vc70e9r1bqlbymt7rqzpvpurafx4zsrts0ic4qbqjmtia7a4qefxrwovecenb19dn3y65kbrjbmz63f1534kl7xoa0xst4f09yfus7kr0h0wn4i1hua9wqcf4hmyqgi6:latest/eval/step_24k.table.json", "_latest_artifact_path": "wandb-client-artifact://vc70e9r1bqlbymt7rqzpvpurafx4zsrts0ic4qbqjmtia7a4qefxrwovecenb19dn3y65kbrjbmz63f1534kl7xoa0xst4f09yfus7kr0h0wn4i1hua9wqcf4hmyqgi6:latest/eval/step_24k.table.json", "ncols": 2, "nrows": 50}} \ No newline at end of file +{"train/grad_norm": 3.03125, "layer_grad_norm/": {"lm_head": {"bias": 0.015380859375, "kernel": 2.875}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 0.022216796875, "scale": 0.028564453125}, "layers": {"0": {"attention": {"k_proj": {"bias": 2.47955322265625e-05, "kernel": 0.00738525390625}, "out_proj": {"bias": 0.00830078125, "kernel": 0.05810546875}, "q_proj": {"bias": 0.00064849853515625, "kernel": 0.006683349609375}, "v_proj": {"bias": 0.010986328125, "kernel": 0.054443359375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0130615234375, "kernel": 0.1513671875}, "output_dense": {"bias": 0.002777099609375, "kernel": 0.1171875}}, "final_layer_norm": {"bias": 0.054443359375, "scale": 0.1201171875}, "layer_norm": {"bias": 0.02685546875, "scale": 0.026611328125}}, "1": {"attention": {"k_proj": {"bias": 1.811981201171875e-05, "kernel": 0.0093994140625}, "out_proj": {"bias": 0.002960205078125, "kernel": 0.0269775390625}, "q_proj": {"bias": 0.001007080078125, "kernel": 0.0103759765625}, "v_proj": {"bias": 0.00579833984375, "kernel": 0.0206298828125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00531005859375, "kernel": 0.0791015625}, "output_dense": {"bias": 0.0028076171875, "kernel": 0.052490234375}}, "final_layer_norm": {"bias": 0.01171875, "scale": 0.0115966796875}, "layer_norm": {"bias": 0.0166015625, "scale": 0.0191650390625}}, "10": {"attention": {"k_proj": {"bias": 1.3947486877441406e-05, "kernel": 0.0260009765625}, "out_proj": {"bias": 0.0023193359375, "kernel": 0.03466796875}, "q_proj": {"bias": 0.00225830078125, "kernel": 0.02587890625}, "v_proj": {"bias": 0.004608154296875, "kernel": 0.032470703125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00408935546875, "kernel": 0.054443359375}, "output_dense": {"bias": 0.0022430419921875, "kernel": 0.041015625}}, "final_layer_norm": {"bias": 0.0086669921875, "scale": 0.0091552734375}, "layer_norm": {"bias": 0.014404296875, "scale": 0.017578125}}, "11": {"attention": {"k_proj": {"bias": 2.9921531677246094e-05, "kernel": 0.030517578125}, "out_proj": {"bias": 0.002288818359375, "kernel": 0.054931640625}, "q_proj": {"bias": 0.002593994140625, "kernel": 0.030029296875}, "v_proj": {"bias": 0.005126953125, "kernel": 0.04931640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.003875732421875, "kernel": 0.053955078125}, "output_dense": {"bias": 0.0022430419921875, "kernel": 0.0400390625}}, "final_layer_norm": {"bias": 0.007781982421875, "scale": 0.00830078125}, "layer_norm": {"bias": 0.0152587890625, "scale": 0.020751953125}}, "12": {"attention": {"k_proj": {"bias": 2.0742416381835938e-05, "kernel": 0.0279541015625}, "out_proj": {"bias": 0.0023040771484375, "kernel": 0.04248046875}, "q_proj": {"bias": 0.0023193359375, "kernel": 0.02734375}, "v_proj": {"bias": 0.00482177734375, "kernel": 0.0390625}}, "feed_forward": {"intermediate_dense": {"bias": 0.004119873046875, "kernel": 0.05419921875}, "output_dense": {"bias": 0.0022430419921875, "kernel": 0.0400390625}}, "final_layer_norm": {"bias": 0.00897216796875, "scale": 0.0101318359375}, "layer_norm": {"bias": 0.0137939453125, "scale": 0.01318359375}}, "13": {"attention": {"k_proj": {"bias": 3.5762786865234375e-05, "kernel": 0.03857421875}, "out_proj": {"bias": 0.0022735595703125, "kernel": 0.0576171875}, "q_proj": {"bias": 0.003143310546875, "kernel": 0.0390625}, "v_proj": {"bias": 0.004913330078125, "kernel": 0.0517578125}}, "feed_forward": {"intermediate_dense": {"bias": 0.004150390625, "kernel": 0.0556640625}, "output_dense": {"bias": 0.002227783203125, "kernel": 0.043701171875}}, "final_layer_norm": {"bias": 0.0087890625, "scale": 0.0087890625}, "layer_norm": {"bias": 0.0142822265625, "scale": 0.017578125}}, "14": {"attention": {"k_proj": {"bias": 2.7418136596679688e-05, "kernel": 0.0230712890625}, "out_proj": {"bias": 0.002288818359375, "kernel": 0.04443359375}, "q_proj": {"bias": 0.001922607421875, "kernel": 0.0233154296875}, "v_proj": {"bias": 0.00433349609375, "kernel": 0.0400390625}}, "feed_forward": {"intermediate_dense": {"bias": 0.004638671875, "kernel": 0.061279296875}, "output_dense": {"bias": 0.002227783203125, "kernel": 0.047119140625}}, "final_layer_norm": {"bias": 0.011474609375, "scale": 0.0130615234375}, "layer_norm": {"bias": 0.010986328125, "scale": 0.00958251953125}}, "15": {"attention": {"k_proj": {"bias": 9.202957153320312e-05, "kernel": 0.04541015625}, "out_proj": {"bias": 0.0022430419921875, "kernel": 0.09326171875}, "q_proj": {"bias": 0.0034027099609375, "kernel": 0.043701171875}, "v_proj": {"bias": 0.00506591796875, "kernel": 0.068359375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0045166015625, "kernel": 0.06103515625}, "output_dense": {"bias": 0.002197265625, "kernel": 0.052734375}}, "final_layer_norm": {"bias": 0.0093994140625, "scale": 0.0084228515625}, "layer_norm": {"bias": 0.01397705078125, "scale": 0.015869140625}}, "16": {"attention": {"k_proj": {"bias": 4.57763671875e-05, "kernel": 0.03076171875}, "out_proj": {"bias": 0.0022430419921875, "kernel": 0.05908203125}, "q_proj": {"bias": 0.0023651123046875, "kernel": 0.0296630859375}, "v_proj": {"bias": 0.00445556640625, "kernel": 0.0458984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0040283203125, "kernel": 0.05712890625}, "output_dense": {"bias": 0.002197265625, "kernel": 0.048828125}}, "final_layer_norm": {"bias": 0.00848388671875, "scale": 0.00714111328125}, "layer_norm": {"bias": 0.0120849609375, "scale": 0.01055908203125}}, "17": {"attention": {"k_proj": {"bias": 1.4781951904296875e-05, "kernel": 0.0247802734375}, "out_proj": {"bias": 0.0022735595703125, "kernel": 0.03466796875}, "q_proj": {"bias": 0.00189208984375, "kernel": 0.0240478515625}, "v_proj": {"bias": 0.00457763671875, "kernel": 0.03466796875}}, "feed_forward": {"intermediate_dense": {"bias": 0.004150390625, "kernel": 0.057861328125}, "output_dense": {"bias": 0.002197265625, "kernel": 0.0478515625}}, "final_layer_norm": {"bias": 0.0089111328125, "scale": 0.007232666015625}, "layer_norm": {"bias": 0.01287841796875, "scale": 0.01129150390625}}, "18": {"attention": {"k_proj": {"bias": 5.817413330078125e-05, "kernel": 0.038330078125}, "out_proj": {"bias": 0.002227783203125, "kernel": 0.064453125}, "q_proj": {"bias": 0.0025787353515625, "kernel": 0.035400390625}, "v_proj": {"bias": 0.004669189453125, "kernel": 0.0537109375}}, "feed_forward": {"intermediate_dense": {"bias": 0.003875732421875, "kernel": 0.05712890625}, "output_dense": {"bias": 0.002166748046875, "kernel": 0.0478515625}}, "final_layer_norm": {"bias": 0.0081787109375, "scale": 0.00860595703125}, "layer_norm": {"bias": 0.012939453125, "scale": 0.01165771484375}}, "19": {"attention": {"k_proj": {"bias": 2.574920654296875e-05, "kernel": 0.02734375}, "out_proj": {"bias": 0.002197265625, "kernel": 0.040771484375}, "q_proj": {"bias": 0.00201416015625, "kernel": 0.0269775390625}, "v_proj": {"bias": 0.00433349609375, "kernel": 0.03662109375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0038604736328125, "kernel": 0.059326171875}, "output_dense": {"bias": 0.00213623046875, "kernel": 0.05029296875}}, "final_layer_norm": {"bias": 0.00811767578125, "scale": 0.006500244140625}, "layer_norm": {"bias": 0.010986328125, "scale": 0.0106201171875}}, "2": {"attention": {"k_proj": {"bias": 1.6450881958007812e-05, "kernel": 0.014892578125}, "out_proj": {"bias": 0.0030364990234375, "kernel": 0.03466796875}, "q_proj": {"bias": 0.0017242431640625, "kernel": 0.015625}, "v_proj": {"bias": 0.0064697265625, "kernel": 0.02978515625}}, "feed_forward": {"intermediate_dense": {"bias": 0.00579833984375, "kernel": 0.09033203125}, "output_dense": {"bias": 0.0027923583984375, "kernel": 0.053466796875}}, "final_layer_norm": {"bias": 0.01318359375, "scale": 0.01312255859375}, "layer_norm": {"bias": 0.0184326171875, "scale": 0.0283203125}}, "20": {"attention": {"k_proj": {"bias": 7.331371307373047e-06, "kernel": 0.0155029296875}, "out_proj": {"bias": 0.002227783203125, "kernel": 0.0234375}, "q_proj": {"bias": 0.0011444091796875, "kernel": 0.016357421875}, "v_proj": {"bias": 0.00396728515625, "kernel": 0.02197265625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0040283203125, "kernel": 0.0634765625}, "output_dense": {"bias": 0.002166748046875, "kernel": 0.05029296875}}, "final_layer_norm": {"bias": 0.00823974609375, "scale": 0.00799560546875}, "layer_norm": {"bias": 0.009521484375, "scale": 0.0078125}}, "21": {"attention": {"k_proj": {"bias": 1.609325408935547e-05, "kernel": 0.0224609375}, "out_proj": {"bias": 0.002227783203125, "kernel": 0.0361328125}, "q_proj": {"bias": 0.0017242431640625, "kernel": 0.0228271484375}, "v_proj": {"bias": 0.00421142578125, "kernel": 0.03369140625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0042724609375, "kernel": 0.068359375}, "output_dense": {"bias": 0.002166748046875, "kernel": 0.0517578125}}, "final_layer_norm": {"bias": 0.00909423828125, "scale": 0.0087890625}, "layer_norm": {"bias": 0.01031494140625, "scale": 0.01019287109375}}, "22": {"attention": {"k_proj": {"bias": 1.6689300537109375e-05, "kernel": 0.0308837890625}, "out_proj": {"bias": 0.00225830078125, "kernel": 0.0380859375}, "q_proj": {"bias": 0.0022125244140625, "kernel": 0.0306396484375}, "v_proj": {"bias": 0.0045166015625, "kernel": 0.03564453125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00439453125, "kernel": 0.0703125}, "output_dense": {"bias": 0.002197265625, "kernel": 0.05126953125}}, "final_layer_norm": {"bias": 0.00958251953125, "scale": 0.00872802734375}, "layer_norm": {"bias": 0.012939453125, "scale": 0.010009765625}}, "23": {"attention": {"k_proj": {"bias": 7.05718994140625e-05, "kernel": 0.058349609375}, "out_proj": {"bias": 0.002197265625, "kernel": 0.076171875}, "q_proj": {"bias": 0.0035552978515625, "kernel": 0.055419921875}, "v_proj": {"bias": 0.005340576171875, "kernel": 0.068359375}}, "feed_forward": {"intermediate_dense": {"bias": 0.00433349609375, "kernel": 0.0751953125}, "output_dense": {"bias": 0.002105712890625, "kernel": 0.05224609375}}, "final_layer_norm": {"bias": 0.00982666015625, "scale": 0.0107421875}, "layer_norm": {"bias": 0.015869140625, "scale": 0.017578125}}, "24": {"attention": {"k_proj": {"bias": 5.340576171875e-05, "kernel": 0.059326171875}, "out_proj": {"bias": 0.00201416015625, "kernel": 0.072265625}, "q_proj": {"bias": 0.003814697265625, "kernel": 0.060546875}, "v_proj": {"bias": 0.005126953125, "kernel": 0.06787109375}}, "feed_forward": {"intermediate_dense": {"bias": 0.004241943359375, "kernel": 0.0751953125}, "output_dense": {"bias": 0.001922607421875, "kernel": 0.04541015625}}, "final_layer_norm": {"bias": 0.0093994140625, "scale": 0.0087890625}, "layer_norm": {"bias": 0.016357421875, "scale": 0.0177001953125}}, "25": {"attention": {"k_proj": {"bias": 3.5762786865234375e-05, "kernel": 0.04296875}, "out_proj": {"bias": 0.0019378662109375, "kernel": 0.05322265625}, "q_proj": {"bias": 0.0028839111328125, "kernel": 0.042724609375}, "v_proj": {"bias": 0.00457763671875, "kernel": 0.052734375}}, "feed_forward": {"intermediate_dense": {"bias": 0.00396728515625, "kernel": 0.072265625}, "output_dense": {"bias": 0.00185394287109375, "kernel": 0.044677734375}}, "final_layer_norm": {"bias": 0.00909423828125, "scale": 0.0081787109375}, "layer_norm": {"bias": 0.0135498046875, "scale": 0.018310546875}}, "26": {"attention": {"k_proj": {"bias": 3.24249267578125e-05, "kernel": 0.045654296875}, "out_proj": {"bias": 0.00185394287109375, "kernel": 0.047607421875}, "q_proj": {"bias": 0.0030364990234375, "kernel": 0.04638671875}, "v_proj": {"bias": 0.004425048828125, "kernel": 0.04736328125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0035858154296875, "kernel": 0.0634765625}, "output_dense": {"bias": 0.001800537109375, "kernel": 0.043212890625}}, "final_layer_norm": {"bias": 0.0081787109375, "scale": 0.008544921875}, "layer_norm": {"bias": 0.01312255859375, "scale": 0.01806640625}}, "27": {"attention": {"k_proj": {"bias": 6.580352783203125e-05, "kernel": 0.07421875}, "out_proj": {"bias": 0.00167083740234375, "kernel": 0.0703125}, "q_proj": {"bias": 0.00457763671875, "kernel": 0.072265625}, "v_proj": {"bias": 0.00469970703125, "kernel": 0.0693359375}}, "feed_forward": {"intermediate_dense": {"bias": 0.003692626953125, "kernel": 0.0634765625}, "output_dense": {"bias": 0.0016326904296875, "kernel": 0.04150390625}}, "final_layer_norm": {"bias": 0.0087890625, "scale": 0.0084228515625}, "layer_norm": {"bias": 0.017822265625, "scale": 0.024658203125}}, "28": {"attention": {"k_proj": {"bias": 5.340576171875e-05, "kernel": 0.059814453125}, "out_proj": {"bias": 0.0015106201171875, "kernel": 0.0634765625}, "q_proj": {"bias": 0.0036468505859375, "kernel": 0.06103515625}, "v_proj": {"bias": 0.0042724609375, "kernel": 0.0634765625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0034027099609375, "kernel": 0.060302734375}, "output_dense": {"bias": 0.001495361328125, "kernel": 0.04052734375}}, "final_layer_norm": {"bias": 0.008544921875, "scale": 0.00836181640625}, "layer_norm": {"bias": 0.0147705078125, "scale": 0.0191650390625}}, "29": {"attention": {"k_proj": {"bias": 6.079673767089844e-05, "kernel": 0.05908203125}, "out_proj": {"bias": 0.00140380859375, "kernel": 0.05712890625}, "q_proj": {"bias": 0.0032958984375, "kernel": 0.06005859375}, "v_proj": {"bias": 0.004119873046875, "kernel": 0.059814453125}}, "feed_forward": {"intermediate_dense": {"bias": 0.003509521484375, "kernel": 0.06689453125}, "output_dense": {"bias": 0.0013275146484375, "kernel": 0.0390625}}, "final_layer_norm": {"bias": 0.0084228515625, "scale": 0.00701904296875}, "layer_norm": {"bias": 0.01458740234375, "scale": 0.014892578125}}, "3": {"attention": {"k_proj": {"bias": 4.100799560546875e-05, "kernel": 0.0223388671875}, "out_proj": {"bias": 0.002960205078125, "kernel": 0.06298828125}, "q_proj": {"bias": 0.0022430419921875, "kernel": 0.0216064453125}, "v_proj": {"bias": 0.005828857421875, "kernel": 0.048828125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00567626953125, "kernel": 0.080078125}, "output_dense": {"bias": 0.002716064453125, "kernel": 0.05078125}}, "final_layer_norm": {"bias": 0.01312255859375, "scale": 0.010986328125}, "layer_norm": {"bias": 0.0157470703125, "scale": 0.02392578125}}, "30": {"attention": {"k_proj": {"bias": 4.4345855712890625e-05, "kernel": 0.0498046875}, "out_proj": {"bias": 0.00128173828125, "kernel": 0.05224609375}, "q_proj": {"bias": 0.0028228759765625, "kernel": 0.0517578125}, "v_proj": {"bias": 0.00347900390625, "kernel": 0.054931640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.003204345703125, "kernel": 0.0634765625}, "output_dense": {"bias": 0.0012054443359375, "kernel": 0.034423828125}}, "final_layer_norm": {"bias": 0.0078125, "scale": 0.0064697265625}, "layer_norm": {"bias": 0.0115966796875, "scale": 0.01806640625}}, "31": {"attention": {"k_proj": {"bias": 4.6253204345703125e-05, "kernel": 0.049560546875}, "out_proj": {"bias": 0.00116729736328125, "kernel": 0.0439453125}, "q_proj": {"bias": 0.00311279296875, "kernel": 0.052734375}, "v_proj": {"bias": 0.003021240234375, "kernel": 0.04736328125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00286865234375, "kernel": 0.05517578125}, "output_dense": {"bias": 0.0010986328125, "kernel": 0.031494140625}}, "final_layer_norm": {"bias": 0.006683349609375, "scale": 0.0072021484375}, "layer_norm": {"bias": 0.0113525390625, "scale": 0.0213623046875}}, "32": {"attention": {"k_proj": {"bias": 3.8623809814453125e-05, "kernel": 0.04150390625}, "out_proj": {"bias": 0.0010833740234375, "kernel": 0.0380859375}, "q_proj": {"bias": 0.002532958984375, "kernel": 0.04248046875}, "v_proj": {"bias": 0.0029144287109375, "kernel": 0.042236328125}}, "feed_forward": {"intermediate_dense": {"bias": 0.002899169921875, "kernel": 0.055908203125}, "output_dense": {"bias": 0.00099945068359375, "kernel": 0.028564453125}}, "final_layer_norm": {"bias": 0.006927490234375, "scale": 0.00714111328125}, "layer_norm": {"bias": 0.01031494140625, "scale": 0.0130615234375}}, "33": {"attention": {"k_proj": {"bias": 4.291534423828125e-05, "kernel": 0.0498046875}, "out_proj": {"bias": 0.00096893310546875, "kernel": 0.036376953125}, "q_proj": {"bias": 0.0029296875, "kernel": 0.04931640625}, "v_proj": {"bias": 0.0026702880859375, "kernel": 0.040283203125}}, "feed_forward": {"intermediate_dense": {"bias": 0.0025482177734375, "kernel": 0.049072265625}, "output_dense": {"bias": 0.000896453857421875, "kernel": 0.026611328125}}, "final_layer_norm": {"bias": 0.00653076171875, "scale": 0.0054931640625}, "layer_norm": {"bias": 0.01007080078125, "scale": 0.018310546875}}, "34": {"attention": {"k_proj": {"bias": 6.29425048828125e-05, "kernel": 0.05322265625}, "out_proj": {"bias": 0.0008392333984375, "kernel": 0.03515625}, "q_proj": {"bias": 0.003326416015625, "kernel": 0.055419921875}, "v_proj": {"bias": 0.0023193359375, "kernel": 0.04150390625}}, "feed_forward": {"intermediate_dense": {"bias": 0.002197265625, "kernel": 0.04052734375}, "output_dense": {"bias": 0.0007781982421875, "kernel": 0.0234375}}, "final_layer_norm": {"bias": 0.006103515625, "scale": 0.005828857421875}, "layer_norm": {"bias": 0.0107421875, "scale": 0.0125732421875}}, "35": {"attention": {"k_proj": {"bias": 4.100799560546875e-05, "kernel": 0.036865234375}, "out_proj": {"bias": 0.000736236572265625, "kernel": 0.03662109375}, "q_proj": {"bias": 0.0021820068359375, "kernel": 0.0390625}, "v_proj": {"bias": 0.001922607421875, "kernel": 0.037109375}}, "feed_forward": {"intermediate_dense": {"bias": 0.0017852783203125, "kernel": 0.0322265625}, "output_dense": {"bias": 0.00069427490234375, "kernel": 0.020751953125}}, "final_layer_norm": {"bias": 0.00457763671875, "scale": 0.0048828125}, "layer_norm": {"bias": 0.009033203125, "scale": 0.0096435546875}}, "36": {"attention": {"k_proj": {"bias": 6.532669067382812e-05, "kernel": 0.02978515625}, "out_proj": {"bias": 0.00067138671875, "kernel": 0.028564453125}, "q_proj": {"bias": 0.00180816650390625, "kernel": 0.0306396484375}, "v_proj": {"bias": 0.0016021728515625, "kernel": 0.0289306640625}}, "feed_forward": {"intermediate_dense": {"bias": 0.0015411376953125, "kernel": 0.028076171875}, "output_dense": {"bias": 0.000640869140625, "kernel": 0.0172119140625}}, "final_layer_norm": {"bias": 0.003753662109375, "scale": 0.0033111572265625}, "layer_norm": {"bias": 0.006988525390625, "scale": 0.00897216796875}}, "37": {"attention": {"k_proj": {"bias": 4.5299530029296875e-05, "kernel": 0.0322265625}, "out_proj": {"bias": 0.00061798095703125, "kernel": 0.025390625}, "q_proj": {"bias": 0.0020904541015625, "kernel": 0.03466796875}, "v_proj": {"bias": 0.00153350830078125, "kernel": 0.0279541015625}}, "feed_forward": {"intermediate_dense": {"bias": 0.001495361328125, "kernel": 0.02783203125}, "output_dense": {"bias": 0.000583648681640625, "kernel": 0.0166015625}}, "final_layer_norm": {"bias": 0.003692626953125, "scale": 0.003143310546875}, "layer_norm": {"bias": 0.007080078125, "scale": 0.007781982421875}}, "38": {"attention": {"k_proj": {"bias": 5.173683166503906e-05, "kernel": 0.03564453125}, "out_proj": {"bias": 0.0005645751953125, "kernel": 0.024169921875}, "q_proj": {"bias": 0.0020751953125, "kernel": 0.03564453125}, "v_proj": {"bias": 0.0013427734375, "kernel": 0.0262451171875}}, "feed_forward": {"intermediate_dense": {"bias": 0.001373291015625, "kernel": 0.026123046875}, "output_dense": {"bias": 0.0005340576171875, "kernel": 0.01611328125}}, "final_layer_norm": {"bias": 0.0034332275390625, "scale": 0.003936767578125}, "layer_norm": {"bias": 0.0064697265625, "scale": 0.00726318359375}}, "39": {"attention": {"k_proj": {"bias": 3.314018249511719e-05, "kernel": 0.033203125}, "out_proj": {"bias": 0.000507354736328125, "kernel": 0.0225830078125}, "q_proj": {"bias": 0.002044677734375, "kernel": 0.037353515625}, "v_proj": {"bias": 0.00128173828125, "kernel": 0.026611328125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00127410888671875, "kernel": 0.02490234375}, "output_dense": {"bias": 0.0004749298095703125, "kernel": 0.01531982421875}}, "final_layer_norm": {"bias": 0.0033111572265625, "scale": 0.0036468505859375}, "layer_norm": {"bias": 0.006591796875, "scale": 0.00799560546875}}, "4": {"attention": {"k_proj": {"bias": 5.8650970458984375e-05, "kernel": 0.026611328125}, "out_proj": {"bias": 0.002838134765625, "kernel": 0.0810546875}, "q_proj": {"bias": 0.0025634765625, "kernel": 0.0272216796875}, "v_proj": {"bias": 0.0054931640625, "kernel": 0.061767578125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00531005859375, "kernel": 0.0712890625}, "output_dense": {"bias": 0.002655029296875, "kernel": 0.052734375}}, "final_layer_norm": {"bias": 0.0115966796875, "scale": 0.01043701171875}, "layer_norm": {"bias": 0.01513671875, "scale": 0.0185546875}}, "40": {"attention": {"k_proj": {"bias": 2.5153160095214844e-05, "kernel": 0.0234375}, "out_proj": {"bias": 0.0004730224609375, "kernel": 0.017333984375}, "q_proj": {"bias": 0.0013427734375, "kernel": 0.024658203125}, "v_proj": {"bias": 0.00102996826171875, "kernel": 0.01904296875}}, "feed_forward": {"intermediate_dense": {"bias": 0.00119781494140625, "kernel": 0.0224609375}, "output_dense": {"bias": 0.000446319580078125, "kernel": 0.01324462890625}}, "final_layer_norm": {"bias": 0.0031890869140625, "scale": 0.005340576171875}, "layer_norm": {"bias": 0.004730224609375, "scale": 0.0078125}}, "41": {"attention": {"k_proj": {"bias": 4.076957702636719e-05, "kernel": 0.03515625}, "out_proj": {"bias": 0.000423431396484375, "kernel": 0.02197265625}, "q_proj": {"bias": 0.0017242431640625, "kernel": 0.036376953125}, "v_proj": {"bias": 0.00121307373046875, "kernel": 0.0279541015625}}, "feed_forward": {"intermediate_dense": {"bias": 0.001068115234375, "kernel": 0.02294921875}, "output_dense": {"bias": 0.0003871917724609375, "kernel": 0.01470947265625}}, "final_layer_norm": {"bias": 0.0031280517578125, "scale": 0.005126953125}, "layer_norm": {"bias": 0.006256103515625, "scale": 0.0076904296875}}, "42": {"attention": {"k_proj": {"bias": 1.0192394256591797e-05, "kernel": 0.011474609375}, "out_proj": {"bias": 0.000392913818359375, "kernel": 0.0128173828125}, "q_proj": {"bias": 0.00067138671875, "kernel": 0.01287841796875}, "v_proj": {"bias": 0.0008087158203125, "kernel": 0.0146484375}}, "feed_forward": {"intermediate_dense": {"bias": 0.00106048583984375, "kernel": 0.023681640625}, "output_dense": {"bias": 0.0003452301025390625, "kernel": 0.01416015625}}, "final_layer_norm": {"bias": 0.00341796875, "scale": 0.0064697265625}, "layer_norm": {"bias": 0.002899169921875, "scale": 0.007354736328125}}, "43": {"attention": {"k_proj": {"bias": 5.27501106262207e-06, "kernel": 0.0062255859375}, "out_proj": {"bias": 0.0003528594970703125, "kernel": 0.00946044921875}, "q_proj": {"bias": 0.00042724609375, "kernel": 0.00726318359375}, "v_proj": {"bias": 0.000629425048828125, "kernel": 0.01031494140625}}, "feed_forward": {"intermediate_dense": {"bias": 0.00096893310546875, "kernel": 0.023193359375}, "output_dense": {"bias": 0.000308990478515625, "kernel": 0.0137939453125}}, "final_layer_norm": {"bias": 0.00311279296875, "scale": 0.00439453125}, "layer_norm": {"bias": 0.001953125, "scale": 0.003448486328125}}, "44": {"attention": {"k_proj": {"bias": 8.702278137207031e-06, "kernel": 0.00665283203125}, "out_proj": {"bias": 0.0003147125244140625, "kernel": 0.0091552734375}, "q_proj": {"bias": 0.000476837158203125, "kernel": 0.0081787109375}, "v_proj": {"bias": 0.00058746337890625, "kernel": 0.01025390625}}, "feed_forward": {"intermediate_dense": {"bias": 0.000896453857421875, "kernel": 0.02392578125}, "output_dense": {"bias": 0.000274658203125, "kernel": 0.01470947265625}}, "final_layer_norm": {"bias": 0.0032501220703125, "scale": 0.003631591796875}, "layer_norm": {"bias": 0.00213623046875, "scale": 0.0026092529296875}}, "45": {"attention": {"k_proj": {"bias": 7.271766662597656e-06, "kernel": 0.00537109375}, "out_proj": {"bias": 0.000286102294921875, "kernel": 0.0087890625}, "q_proj": {"bias": 0.0003948211669921875, "kernel": 0.00677490234375}, "v_proj": {"bias": 0.00052642822265625, "kernel": 0.0091552734375}}, "feed_forward": {"intermediate_dense": {"bias": 0.00080108642578125, "kernel": 0.02099609375}, "output_dense": {"bias": 0.000255584716796875, "kernel": 0.0155029296875}}, "final_layer_norm": {"bias": 0.00286865234375, "scale": 0.0033416748046875}, "layer_norm": {"bias": 0.001922607421875, "scale": 0.00238037109375}}, "46": {"attention": {"k_proj": {"bias": 1.4066696166992188e-05, "kernel": 0.005126953125}, "out_proj": {"bias": 0.00026702880859375, "kernel": 0.0096435546875}, "q_proj": {"bias": 0.000362396240234375, "kernel": 0.00604248046875}, "v_proj": {"bias": 0.0005340576171875, "kernel": 0.00970458984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.000732421875, "kernel": 0.018310546875}, "output_dense": {"bias": 0.00023651123046875, "kernel": 0.0250244140625}}, "final_layer_norm": {"bias": 0.003326416015625, "scale": 0.00408935546875}, "layer_norm": {"bias": 0.0022125244140625, "scale": 0.00274658203125}}, "47": {"attention": {"k_proj": {"bias": 0.000148773193359375, "kernel": 0.00494384765625}, "out_proj": {"bias": 0.000240325927734375, "kernel": 0.02099609375}, "q_proj": {"bias": 0.00021076202392578125, "kernel": 0.0036163330078125}, "v_proj": {"bias": 0.0004291534423828125, "kernel": 0.00830078125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00049591064453125, "kernel": 0.011474609375}, "output_dense": {"bias": 0.00022792816162109375, "kernel": 0.05615234375}}, "final_layer_norm": {"bias": 0.004180908203125, "scale": 0.005828857421875}, "layer_norm": {"bias": 0.002593994140625, "scale": 0.0037994384765625}}, "5": {"attention": {"k_proj": {"bias": 1.3470649719238281e-05, "kernel": 0.022705078125}, "out_proj": {"bias": 0.002899169921875, "kernel": 0.03564453125}, "q_proj": {"bias": 0.002044677734375, "kernel": 0.0225830078125}, "v_proj": {"bias": 0.0057373046875, "kernel": 0.033203125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00555419921875, "kernel": 0.068359375}, "output_dense": {"bias": 0.002685546875, "kernel": 0.052001953125}}, "final_layer_norm": {"bias": 0.013427734375, "scale": 0.012451171875}, "layer_norm": {"bias": 0.016357421875, "scale": 0.0191650390625}}, "6": {"attention": {"k_proj": {"bias": 4.6253204345703125e-05, "kernel": 0.0380859375}, "out_proj": {"bias": 0.002716064453125, "kernel": 0.0751953125}, "q_proj": {"bias": 0.003326416015625, "kernel": 0.03515625}, "v_proj": {"bias": 0.0059814453125, "kernel": 0.064453125}}, "feed_forward": {"intermediate_dense": {"bias": 0.00537109375, "kernel": 0.0703125}, "output_dense": {"bias": 0.002532958984375, "kernel": 0.05078125}}, "final_layer_norm": {"bias": 0.011474609375, "scale": 0.01104736328125}, "layer_norm": {"bias": 0.0166015625, "scale": 0.016845703125}}, "7": {"attention": {"k_proj": {"bias": 5.4836273193359375e-05, "kernel": 0.0380859375}, "out_proj": {"bias": 0.00262451171875, "kernel": 0.080078125}, "q_proj": {"bias": 0.0031585693359375, "kernel": 0.0361328125}, "v_proj": {"bias": 0.0054931640625, "kernel": 0.0654296875}}, "feed_forward": {"intermediate_dense": {"bias": 0.005035400390625, "kernel": 0.0693359375}, "output_dense": {"bias": 0.002471923828125, "kernel": 0.050537109375}}, "final_layer_norm": {"bias": 0.010986328125, "scale": 0.0087890625}, "layer_norm": {"bias": 0.016357421875, "scale": 0.022216796875}}, "8": {"attention": {"k_proj": {"bias": 4.744529724121094e-05, "kernel": 0.035400390625}, "out_proj": {"bias": 0.0025177001953125, "kernel": 0.064453125}, "q_proj": {"bias": 0.002960205078125, "kernel": 0.03369140625}, "v_proj": {"bias": 0.0052490234375, "kernel": 0.0546875}}, "feed_forward": {"intermediate_dense": {"bias": 0.005035400390625, "kernel": 0.0693359375}, "output_dense": {"bias": 0.002410888671875, "kernel": 0.0498046875}}, "final_layer_norm": {"bias": 0.01165771484375, "scale": 0.0103759765625}, "layer_norm": {"bias": 0.0152587890625, "scale": 0.020751953125}}, "9": {"attention": {"k_proj": {"bias": 7.295608520507812e-05, "kernel": 0.043212890625}, "out_proj": {"bias": 0.0023193359375, "kernel": 0.09375}, "q_proj": {"bias": 0.003143310546875, "kernel": 0.04150390625}, "v_proj": {"bias": 0.0048828125, "kernel": 0.083984375}}, "feed_forward": {"intermediate_dense": {"bias": 0.004119873046875, "kernel": 0.0615234375}, "output_dense": {"bias": 0.00225830078125, "kernel": 0.044677734375}}, "final_layer_norm": {"bias": 0.0089111328125, "scale": 0.00897216796875}, "layer_norm": {"bias": 0.0142822265625, "scale": 0.017822265625}}}, "pos_conv_embed": {"conv": {"bias": 0.00823974609375, "weight_g": 0.0260009765625, "weight_v": 0.08984375}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "1": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "2": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "3": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "4": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "5": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}, "6": {"conv": {"bias": 0.0, "kernel": 0.0}, "layer_norm": {"bias": 0.0, "scale": 0.0}}}}, "feature_projection": {"layer_norm": {"bias": 0.037109375, "scale": 0.06787109375}, "projection": {"bias": 0.0126953125, "kernel": 0.2734375}}, "masked_spec_embed": 0.0}}, "layer_param_norm/": {"lm_head": {"bias": 0.5428199768066406, "kernel": 39.53935241699219}, "wav2vec2": {"encoder": {"layer_norm": {"bias": 44.34605026245117, "scale": 73.3198471069336}, "layers": {"0": {"attention": {"k_proj": {"bias": 0.8326878547668457, "kernel": 33.13389587402344}, "out_proj": {"bias": 1.9269423484802246, "kernel": 29.364213943481445}, "q_proj": {"bias": 2.1786375045776367, "kernel": 32.84344482421875}, "v_proj": {"bias": 0.5624852776527405, "kernel": 29.58292579650879}}, "feed_forward": {"intermediate_dense": {"bias": 2.675503730773926, "kernel": 116.23121643066406}, "output_dense": {"bias": 1.303141474723816, "kernel": 112.41329956054688}}, "final_layer_norm": {"bias": 1.6423965692520142, "scale": 19.779216766357422}, "layer_norm": {"bias": 2.610494375228882, "scale": 13.602749824523926}}, "1": {"attention": {"k_proj": {"bias": 0.9677044749259949, "kernel": 47.55170440673828}, "out_proj": {"bias": 1.4074771404266357, "kernel": 48.71104431152344}, "q_proj": {"bias": 3.487532615661621, "kernel": 47.43404769897461}, "v_proj": {"bias": 0.40163999795913696, "kernel": 46.684722900390625}}, "feed_forward": {"intermediate_dense": {"bias": 2.579317092895508, "kernel": 109.14014434814453}, "output_dense": {"bias": 0.9453935623168945, "kernel": 95.64762878417969}}, "final_layer_norm": {"bias": 1.4216214418411255, "scale": 19.20606803894043}, "layer_norm": {"bias": 1.7669947147369385, "scale": 15.537803649902344}}, "10": {"attention": {"k_proj": {"bias": 1.0214643478393555, "kernel": 55.45654296875}, "out_proj": {"bias": 1.3683927059173584, "kernel": 57.103294372558594}, "q_proj": {"bias": 2.743344306945801, "kernel": 55.54225158691406}, "v_proj": {"bias": 0.43790698051452637, "kernel": 56.592411041259766}}, "feed_forward": {"intermediate_dense": {"bias": 2.456008195877075, "kernel": 112.09131622314453}, "output_dense": {"bias": 0.7022168636322021, "kernel": 104.06380462646484}}, "final_layer_norm": {"bias": 2.2165937423706055, "scale": 21.315162658691406}, "layer_norm": {"bias": 1.719329595565796, "scale": 19.529136657714844}}, "11": {"attention": {"k_proj": {"bias": 1.132107138633728, "kernel": 56.78343963623047}, "out_proj": {"bias": 1.2154752016067505, "kernel": 58.082176208496094}, "q_proj": {"bias": 2.707162618637085, "kernel": 56.87295150756836}, "v_proj": {"bias": 0.5470556020736694, "kernel": 57.515804290771484}}, "feed_forward": {"intermediate_dense": {"bias": 2.4912850856781006, "kernel": 113.05030822753906}, "output_dense": {"bias": 0.6732709407806396, "kernel": 105.67353820800781}}, "final_layer_norm": {"bias": 2.2047781944274902, "scale": 21.52802848815918}, "layer_norm": {"bias": 1.7760242223739624, "scale": 20.865413665771484}}, "12": {"attention": {"k_proj": {"bias": 1.0565543174743652, "kernel": 57.1015510559082}, "out_proj": {"bias": 1.1852445602416992, "kernel": 57.51365280151367}, "q_proj": {"bias": 2.5627965927124023, "kernel": 57.20713806152344}, "v_proj": {"bias": 0.5132226943969727, "kernel": 57.104278564453125}}, "feed_forward": {"intermediate_dense": {"bias": 2.5303268432617188, "kernel": 114.10084533691406}, "output_dense": {"bias": 0.6569243669509888, "kernel": 107.61695861816406}}, "final_layer_norm": {"bias": 2.1192846298217773, "scale": 21.54072380065918}, "layer_norm": {"bias": 1.8206603527069092, "scale": 20.66368865966797}}, "13": {"attention": {"k_proj": {"bias": 1.109609842300415, "kernel": 59.139137268066406}, "out_proj": {"bias": 1.204558253288269, "kernel": 57.836463928222656}, "q_proj": {"bias": 2.508016586303711, "kernel": 59.25297546386719}, "v_proj": {"bias": 0.49843183159828186, "kernel": 57.290313720703125}}, "feed_forward": {"intermediate_dense": {"bias": 2.553337574005127, "kernel": 115.32876586914062}, "output_dense": {"bias": 0.6907990574836731, "kernel": 108.72101593017578}}, "final_layer_norm": {"bias": 2.0585994720458984, "scale": 21.673809051513672}, "layer_norm": {"bias": 1.9956371784210205, "scale": 21.9807186126709}}, "14": {"attention": {"k_proj": {"bias": 1.0712106227874756, "kernel": 58.71227264404297}, "out_proj": {"bias": 1.3670698404312134, "kernel": 55.26051330566406}, "q_proj": {"bias": 2.6314339637756348, "kernel": 58.89701843261719}, "v_proj": {"bias": 0.46164584159851074, "kernel": 54.09172821044922}}, "feed_forward": {"intermediate_dense": {"bias": 2.5965352058410645, "kernel": 116.16297912597656}, "output_dense": {"bias": 0.746627926826477, "kernel": 110.53941345214844}}, "final_layer_norm": {"bias": 2.130291223526001, "scale": 21.923263549804688}, "layer_norm": {"bias": 2.070974826812744, "scale": 21.296051025390625}}, "15": {"attention": {"k_proj": {"bias": 1.1247915029525757, "kernel": 59.72203826904297}, "out_proj": {"bias": 1.4958913326263428, "kernel": 58.015098571777344}, "q_proj": {"bias": 2.660292625427246, "kernel": 60.07173156738281}, "v_proj": {"bias": 0.598690390586853, "kernel": 57.066253662109375}}, "feed_forward": {"intermediate_dense": {"bias": 2.634087085723877, "kernel": 116.24002075195312}, "output_dense": {"bias": 0.9244471192359924, "kernel": 112.09469604492188}}, "final_layer_norm": {"bias": 2.292771339416504, "scale": 21.723724365234375}, "layer_norm": {"bias": 2.3929615020751953, "scale": 23.508939743041992}}, "16": {"attention": {"k_proj": {"bias": 1.0540928840637207, "kernel": 59.11586380004883}, "out_proj": {"bias": 1.4032483100891113, "kernel": 56.60514831542969}, "q_proj": {"bias": 2.7260608673095703, "kernel": 59.254310607910156}, "v_proj": {"bias": 0.4804057478904724, "kernel": 55.42608642578125}}, "feed_forward": {"intermediate_dense": {"bias": 2.6082377433776855, "kernel": 116.49190521240234}, "output_dense": {"bias": 0.9517278671264648, "kernel": 112.44628143310547}}, "final_layer_norm": {"bias": 2.338442802429199, "scale": 22.12885284423828}, "layer_norm": {"bias": 2.233637571334839, "scale": 21.41533851623535}}, "17": {"attention": {"k_proj": {"bias": 0.9837132692337036, "kernel": 58.42851638793945}, "out_proj": {"bias": 1.3364170789718628, "kernel": 55.23937225341797}, "q_proj": {"bias": 2.8030295372009277, "kernel": 58.6165771484375}, "v_proj": {"bias": 0.5120510458946228, "kernel": 54.12096405029297}}, "feed_forward": {"intermediate_dense": {"bias": 2.5946855545043945, "kernel": 117.4000244140625}, "output_dense": {"bias": 0.9760332107543945, "kernel": 112.84657287597656}}, "final_layer_norm": {"bias": 2.3310747146606445, "scale": 22.577356338500977}, "layer_norm": {"bias": 2.1474575996398926, "scale": 19.647247314453125}}, "18": {"attention": {"k_proj": {"bias": 1.1168575286865234, "kernel": 60.995887756347656}, "out_proj": {"bias": 1.4672904014587402, "kernel": 57.6978874206543}, "q_proj": {"bias": 2.7197160720825195, "kernel": 61.35331726074219}, "v_proj": {"bias": 0.5715624690055847, "kernel": 56.36853790283203}}, "feed_forward": {"intermediate_dense": {"bias": 2.6232266426086426, "kernel": 117.55758666992188}, "output_dense": {"bias": 1.1174592971801758, "kernel": 114.36212158203125}}, "final_layer_norm": {"bias": 2.5425472259521484, "scale": 22.800260543823242}, "layer_norm": {"bias": 2.3893299102783203, "scale": 22.76236915588379}}, "19": {"attention": {"k_proj": {"bias": 1.0208070278167725, "kernel": 59.06885528564453}, "out_proj": {"bias": 1.4778454303741455, "kernel": 56.21014404296875}, "q_proj": {"bias": 2.8918728828430176, "kernel": 59.33289337158203}, "v_proj": {"bias": 0.5297054052352905, "kernel": 54.7474365234375}}, "feed_forward": {"intermediate_dense": {"bias": 2.669632911682129, "kernel": 117.91584777832031}, "output_dense": {"bias": 1.2106082439422607, "kernel": 115.20211791992188}}, "final_layer_norm": {"bias": 2.573245048522949, "scale": 23.14816665649414}, "layer_norm": {"bias": 2.252002477645874, "scale": 21.209671020507812}}, "2": {"attention": {"k_proj": {"bias": 1.0454318523406982, "kernel": 54.686676025390625}, "out_proj": {"bias": 1.2734014987945557, "kernel": 51.490577697753906}, "q_proj": {"bias": 3.575977325439453, "kernel": 54.497100830078125}, "v_proj": {"bias": 0.3959110677242279, "kernel": 50.49995422363281}}, "feed_forward": {"intermediate_dense": {"bias": 2.594606399536133, "kernel": 113.68925476074219}, "output_dense": {"bias": 0.8178951740264893, "kernel": 99.30268859863281}}, "final_layer_norm": {"bias": 1.4472203254699707, "scale": 21.849597930908203}, "layer_norm": {"bias": 1.584557056427002, "scale": 18.51214599609375}}, "20": {"attention": {"k_proj": {"bias": 0.9627430438995361, "kernel": 58.09296798706055}, "out_proj": {"bias": 1.5276570320129395, "kernel": 54.73149871826172}, "q_proj": {"bias": 2.841951847076416, "kernel": 58.31769561767578}, "v_proj": {"bias": 0.501281201839447, "kernel": 53.01441955566406}}, "feed_forward": {"intermediate_dense": {"bias": 2.6359379291534424, "kernel": 118.42901611328125}, "output_dense": {"bias": 1.3258147239685059, "kernel": 115.80030059814453}}, "final_layer_norm": {"bias": 2.5501017570495605, "scale": 23.993675231933594}, "layer_norm": {"bias": 2.172488212585449, "scale": 20.22334098815918}}, "21": {"attention": {"k_proj": {"bias": 1.0651466846466064, "kernel": 59.5225830078125}, "out_proj": {"bias": 1.5949819087982178, "kernel": 55.423763275146484}, "q_proj": {"bias": 2.7778890132904053, "kernel": 59.900787353515625}, "v_proj": {"bias": 0.6249045133590698, "kernel": 53.91331100463867}}, "feed_forward": {"intermediate_dense": {"bias": 2.67600154876709, "kernel": 118.85958862304688}, "output_dense": {"bias": 1.4759166240692139, "kernel": 116.21441650390625}}, "final_layer_norm": {"bias": 2.586477518081665, "scale": 23.85940933227539}, "layer_norm": {"bias": 2.245365619659424, "scale": 20.780105590820312}}, "22": {"attention": {"k_proj": {"bias": 1.108874797821045, "kernel": 60.636451721191406}, "out_proj": {"bias": 1.5651140213012695, "kernel": 55.89690017700195}, "q_proj": {"bias": 2.7927298545837402, "kernel": 60.867923736572266}, "v_proj": {"bias": 0.6132253408432007, "kernel": 55.12964630126953}}, "feed_forward": {"intermediate_dense": {"bias": 2.5806026458740234, "kernel": 118.8563003540039}, "output_dense": {"bias": 1.5526583194732666, "kernel": 115.60234069824219}}, "final_layer_norm": {"bias": 2.440950632095337, "scale": 23.38101577758789}, "layer_norm": {"bias": 2.2541518211364746, "scale": 19.850492477416992}}, "23": {"attention": {"k_proj": {"bias": 1.1803855895996094, "kernel": 63.56022262573242}, "out_proj": {"bias": 1.8045732975006104, "kernel": 58.528839111328125}, "q_proj": {"bias": 2.795893669128418, "kernel": 63.81403350830078}, "v_proj": {"bias": 0.760036289691925, "kernel": 58.50494384765625}}, "feed_forward": {"intermediate_dense": {"bias": 2.5427958965301514, "kernel": 119.45539855957031}, "output_dense": {"bias": 1.5261703729629517, "kernel": 117.31689453125}}, "final_layer_norm": {"bias": 2.8512122631073, "scale": 23.814239501953125}, "layer_norm": {"bias": 2.7235336303710938, "scale": 23.143463134765625}}, "24": {"attention": {"k_proj": {"bias": 1.2392760515213013, "kernel": 63.35820388793945}, "out_proj": {"bias": 1.8753303289413452, "kernel": 60.957786560058594}, "q_proj": {"bias": 2.969503402709961, "kernel": 63.347145080566406}, "v_proj": {"bias": 0.7420969009399414, "kernel": 60.3218994140625}}, "feed_forward": {"intermediate_dense": {"bias": 2.6453800201416016, "kernel": 118.71673583984375}, "output_dense": {"bias": 1.5667115449905396, "kernel": 119.97584533691406}}, "final_layer_norm": {"bias": 2.882058620452881, "scale": 23.88472557067871}, "layer_norm": {"bias": 2.4996657371520996, "scale": 22.191129684448242}}, "25": {"attention": {"k_proj": {"bias": 1.1723105907440186, "kernel": 62.71778106689453}, "out_proj": {"bias": 1.5817502737045288, "kernel": 57.569122314453125}, "q_proj": {"bias": 2.969485282897949, "kernel": 62.733360290527344}, "v_proj": {"bias": 0.7101114988327026, "kernel": 57.584442138671875}}, "feed_forward": {"intermediate_dense": {"bias": 2.5500712394714355, "kernel": 119.05323791503906}, "output_dense": {"bias": 1.3005712032318115, "kernel": 119.923583984375}}, "final_layer_norm": {"bias": 2.696660280227661, "scale": 24.387502670288086}, "layer_norm": {"bias": 2.4880142211914062, "scale": 20.928028106689453}}, "26": {"attention": {"k_proj": {"bias": 1.127084493637085, "kernel": 61.64911651611328}, "out_proj": {"bias": 1.4543545246124268, "kernel": 57.30519485473633}, "q_proj": {"bias": 3.0726661682128906, "kernel": 61.54557800292969}, "v_proj": {"bias": 0.5901228189468384, "kernel": 57.70402908325195}}, "feed_forward": {"intermediate_dense": {"bias": 2.644829750061035, "kernel": 118.23487091064453}, "output_dense": {"bias": 1.250557541847229, "kernel": 116.10880279541016}}, "final_layer_norm": {"bias": 2.3120343685150146, "scale": 23.21187973022461}, "layer_norm": {"bias": 2.371980905532837, "scale": 20.954845428466797}}, "27": {"attention": {"k_proj": {"bias": 1.317265272140503, "kernel": 64.05865478515625}, "out_proj": {"bias": 1.6727519035339355, "kernel": 60.77666473388672}, "q_proj": {"bias": 2.917194128036499, "kernel": 64.23175048828125}, "v_proj": {"bias": 0.7484986782073975, "kernel": 60.96894073486328}}, "feed_forward": {"intermediate_dense": {"bias": 2.7722606658935547, "kernel": 117.14579772949219}, "output_dense": {"bias": 1.0733555555343628, "kernel": 116.0633773803711}}, "final_layer_norm": {"bias": 2.5812458992004395, "scale": 22.717479705810547}, "layer_norm": {"bias": 2.550534725189209, "scale": 22.94876480102539}}, "28": {"attention": {"k_proj": {"bias": 1.2572720050811768, "kernel": 64.16455078125}, "out_proj": {"bias": 1.6966627836227417, "kernel": 60.11882019042969}, "q_proj": {"bias": 3.1611275672912598, "kernel": 64.0079574584961}, "v_proj": {"bias": 0.664945125579834, "kernel": 60.57808303833008}}, "feed_forward": {"intermediate_dense": {"bias": 2.746213436126709, "kernel": 117.54061889648438}, "output_dense": {"bias": 0.9000833034515381, "kernel": 117.95083618164062}}, "final_layer_norm": {"bias": 2.5712246894836426, "scale": 23.1610050201416}, "layer_norm": {"bias": 2.155653953552246, "scale": 23.39959716796875}}, "29": {"attention": {"k_proj": {"bias": 1.2412364482879639, "kernel": 63.07917022705078}, "out_proj": {"bias": 1.6412043571472168, "kernel": 63.08409881591797}, "q_proj": {"bias": 3.1162118911743164, "kernel": 63.035369873046875}, "v_proj": {"bias": 0.6149334907531738, "kernel": 63.20928955078125}}, "feed_forward": {"intermediate_dense": {"bias": 2.7683238983154297, "kernel": 118.87762451171875}, "output_dense": {"bias": 1.1248301267623901, "kernel": 123.00424194335938}}, "final_layer_norm": {"bias": 2.8067712783813477, "scale": 24.53290367126465}, "layer_norm": {"bias": 2.2788681983947754, "scale": 24.371368408203125}}, "3": {"attention": {"k_proj": {"bias": 1.0682117938995361, "kernel": 58.82686996459961}, "out_proj": {"bias": 1.4460158348083496, "kernel": 53.96826171875}, "q_proj": {"bias": 3.1143360137939453, "kernel": 59.086524963378906}, "v_proj": {"bias": 0.38192105293273926, "kernel": 53.452857971191406}}, "feed_forward": {"intermediate_dense": {"bias": 2.5585412979125977, "kernel": 115.36900329589844}, "output_dense": {"bias": 0.7798702716827393, "kernel": 102.53439331054688}}, "final_layer_norm": {"bias": 1.6873037815093994, "scale": 22.31991195678711}, "layer_norm": {"bias": 1.8086010217666626, "scale": 21.418964385986328}}, "30": {"attention": {"k_proj": {"bias": 1.2559503316879272, "kernel": 63.75602722167969}, "out_proj": {"bias": 1.4681390523910522, "kernel": 59.37921905517578}, "q_proj": {"bias": 3.175278425216675, "kernel": 63.94244384765625}, "v_proj": {"bias": 0.6481107473373413, "kernel": 60.02722930908203}}, "feed_forward": {"intermediate_dense": {"bias": 2.740499973297119, "kernel": 119.35234069824219}, "output_dense": {"bias": 1.0934734344482422, "kernel": 122.9593734741211}}, "final_layer_norm": {"bias": 2.7396388053894043, "scale": 25.626995086669922}, "layer_norm": {"bias": 2.2996180057525635, "scale": 24.201406478881836}}, "31": {"attention": {"k_proj": {"bias": 1.2782238721847534, "kernel": 62.301849365234375}, "out_proj": {"bias": 1.4012553691864014, "kernel": 58.65263366699219}, "q_proj": {"bias": 2.939175605773926, "kernel": 62.559688568115234}, "v_proj": {"bias": 0.6553064584732056, "kernel": 59.218605041503906}}, "feed_forward": {"intermediate_dense": {"bias": 2.817246437072754, "kernel": 117.91593933105469}, "output_dense": {"bias": 1.2606602907180786, "kernel": 119.70211791992188}}, "final_layer_norm": {"bias": 2.5327694416046143, "scale": 25.40445327758789}, "layer_norm": {"bias": 2.305802583694458, "scale": 23.823705673217773}}, "32": {"attention": {"k_proj": {"bias": 1.3094589710235596, "kernel": 63.533992767333984}, "out_proj": {"bias": 1.3854526281356812, "kernel": 58.824134826660156}, "q_proj": {"bias": 3.114076614379883, "kernel": 63.627105712890625}, "v_proj": {"bias": 0.6093175411224365, "kernel": 59.80768585205078}}, "feed_forward": {"intermediate_dense": {"bias": 2.7801480293273926, "kernel": 117.20947265625}, "output_dense": {"bias": 1.324599027633667, "kernel": 119.54325866699219}}, "final_layer_norm": {"bias": 2.5435423851013184, "scale": 25.68441390991211}, "layer_norm": {"bias": 2.4235501289367676, "scale": 23.902536392211914}}, "33": {"attention": {"k_proj": {"bias": 1.3366351127624512, "kernel": 63.296287536621094}, "out_proj": {"bias": 1.430068850517273, "kernel": 58.4971809387207}, "q_proj": {"bias": 3.2261195182800293, "kernel": 63.553199768066406}, "v_proj": {"bias": 0.647566020488739, "kernel": 59.47398376464844}}, "feed_forward": {"intermediate_dense": {"bias": 2.7881383895874023, "kernel": 115.89324188232422}, "output_dense": {"bias": 1.3512067794799805, "kernel": 117.43673706054688}}, "final_layer_norm": {"bias": 2.453516721725464, "scale": 25.343490600585938}, "layer_norm": {"bias": 2.507319450378418, "scale": 23.96185302734375}}, "34": {"attention": {"k_proj": {"bias": 1.2941782474517822, "kernel": 62.228370666503906}, "out_proj": {"bias": 1.6845568418502808, "kernel": 58.70769119262695}, "q_proj": {"bias": 3.1820318698883057, "kernel": 62.562862396240234}, "v_proj": {"bias": 0.5970059633255005, "kernel": 59.69546127319336}}, "feed_forward": {"intermediate_dense": {"bias": 2.897738456726074, "kernel": 114.57638549804688}, "output_dense": {"bias": 1.2965514659881592, "kernel": 115.59634399414062}}, "final_layer_norm": {"bias": 2.396876573562622, "scale": 24.340225219726562}, "layer_norm": {"bias": 2.5630249977111816, "scale": 24.65895652770996}}, "35": {"attention": {"k_proj": {"bias": 1.4279742240905762, "kernel": 66.40350341796875}, "out_proj": {"bias": 1.591472864151001, "kernel": 57.53840637207031}, "q_proj": {"bias": 2.823566436767578, "kernel": 67.20199584960938}, "v_proj": {"bias": 0.5889960527420044, "kernel": 58.97966003417969}}, "feed_forward": {"intermediate_dense": {"bias": 3.0181026458740234, "kernel": 112.65335083007812}, "output_dense": {"bias": 1.1505603790283203, "kernel": 112.97894287109375}}, "final_layer_norm": {"bias": 2.441446304321289, "scale": 23.994781494140625}, "layer_norm": {"bias": 2.542743682861328, "scale": 25.000572204589844}}, "36": {"attention": {"k_proj": {"bias": 1.4029040336608887, "kernel": 63.1389045715332}, "out_proj": {"bias": 1.5916166305541992, "kernel": 57.22056198120117}, "q_proj": {"bias": 2.8235912322998047, "kernel": 63.60721969604492}, "v_proj": {"bias": 0.5017426013946533, "kernel": 59.000877380371094}}, "feed_forward": {"intermediate_dense": {"bias": 2.8778231143951416, "kernel": 110.61883544921875}, "output_dense": {"bias": 1.1144599914550781, "kernel": 111.57343292236328}}, "final_layer_norm": {"bias": 2.349557399749756, "scale": 24.3450927734375}, "layer_norm": {"bias": 2.4408788681030273, "scale": 24.49927520751953}}, "37": {"attention": {"k_proj": {"bias": 1.3937087059020996, "kernel": 60.52811813354492}, "out_proj": {"bias": 1.8286242485046387, "kernel": 56.00824737548828}, "q_proj": {"bias": 2.5970423221588135, "kernel": 60.78678894042969}, "v_proj": {"bias": 0.4639900326728821, "kernel": 57.38051223754883}}, "feed_forward": {"intermediate_dense": {"bias": 2.771303653717041, "kernel": 109.14910888671875}, "output_dense": {"bias": 1.1065362691879272, "kernel": 110.55940246582031}}, "final_layer_norm": {"bias": 2.10972261428833, "scale": 24.696584701538086}, "layer_norm": {"bias": 2.427600383758545, "scale": 24.660919189453125}}, "38": {"attention": {"k_proj": {"bias": 1.3824093341827393, "kernel": 58.65778350830078}, "out_proj": {"bias": 1.515190601348877, "kernel": 54.93376159667969}, "q_proj": {"bias": 2.484334945678711, "kernel": 58.929527282714844}, "v_proj": {"bias": 0.4655402898788452, "kernel": 56.142242431640625}}, "feed_forward": {"intermediate_dense": {"bias": 2.735954999923706, "kernel": 106.66136169433594}, "output_dense": {"bias": 1.0608540773391724, "kernel": 108.26023864746094}}, "final_layer_norm": {"bias": 2.145535945892334, "scale": 25.38375473022461}, "layer_norm": {"bias": 2.560453414916992, "scale": 25.668073654174805}}, "39": {"attention": {"k_proj": {"bias": 1.3524219989776611, "kernel": 57.31468200683594}, "out_proj": {"bias": 1.774656891822815, "kernel": 54.37468719482422}, "q_proj": {"bias": 2.257288932800293, "kernel": 57.76541519165039}, "v_proj": {"bias": 0.4829084873199463, "kernel": 55.80891418457031}}, "feed_forward": {"intermediate_dense": {"bias": 2.6967740058898926, "kernel": 103.42811584472656}, "output_dense": {"bias": 1.1364665031433105, "kernel": 107.01261901855469}}, "final_layer_norm": {"bias": 2.0256223678588867, "scale": 26.028522491455078}, "layer_norm": {"bias": 2.5359292030334473, "scale": 26.22036361694336}}, "4": {"attention": {"k_proj": {"bias": 1.055710792541504, "kernel": 61.09189987182617}, "out_proj": {"bias": 1.6698455810546875, "kernel": 55.757781982421875}, "q_proj": {"bias": 2.8421502113342285, "kernel": 61.419281005859375}, "v_proj": {"bias": 0.4117211103439331, "kernel": 55.36876678466797}}, "feed_forward": {"intermediate_dense": {"bias": 2.505772590637207, "kernel": 114.95476531982422}, "output_dense": {"bias": 0.9291812181472778, "kernel": 104.35224914550781}}, "final_layer_norm": {"bias": 1.8547258377075195, "scale": 21.94887924194336}, "layer_norm": {"bias": 1.9757022857666016, "scale": 22.878047943115234}}, "40": {"attention": {"k_proj": {"bias": 1.3348209857940674, "kernel": 55.37572479248047}, "out_proj": {"bias": 1.7106268405914307, "kernel": 51.78199768066406}, "q_proj": {"bias": 2.1897947788238525, "kernel": 56.08899688720703}, "v_proj": {"bias": 0.51689612865448, "kernel": 52.4681396484375}}, "feed_forward": {"intermediate_dense": {"bias": 2.542057514190674, "kernel": 100.45166015625}, "output_dense": {"bias": 1.158740758895874, "kernel": 103.93336486816406}}, "final_layer_norm": {"bias": 1.9524149894714355, "scale": 25.311817169189453}, "layer_norm": {"bias": 2.4308900833129883, "scale": 25.665002822875977}}, "41": {"attention": {"k_proj": {"bias": 2.1292548179626465, "kernel": 54.88892364501953}, "out_proj": {"bias": 1.4778457880020142, "kernel": 53.82907485961914}, "q_proj": {"bias": 1.8950426578521729, "kernel": 55.096229553222656}, "v_proj": {"bias": 0.5350444316864014, "kernel": 54.30274200439453}}, "feed_forward": {"intermediate_dense": {"bias": 2.6337058544158936, "kernel": 96.57034301757812}, "output_dense": {"bias": 1.195610761642456, "kernel": 101.90911865234375}}, "final_layer_norm": {"bias": 2.2974343299865723, "scale": 28.653759002685547}, "layer_norm": {"bias": 2.4208178520202637, "scale": 27.652393341064453}}, "42": {"attention": {"k_proj": {"bias": 1.4556612968444824, "kernel": 49.96910095214844}, "out_proj": {"bias": 1.5277200937271118, "kernel": 46.47171401977539}, "q_proj": {"bias": 1.772294521331787, "kernel": 50.624691009521484}, "v_proj": {"bias": 0.7890019416809082, "kernel": 45.6768798828125}}, "feed_forward": {"intermediate_dense": {"bias": 2.533141613006592, "kernel": 96.34883880615234}, "output_dense": {"bias": 1.2392287254333496, "kernel": 100.5073013305664}}, "final_layer_norm": {"bias": 2.0547590255737305, "scale": 29.897579193115234}, "layer_norm": {"bias": 1.867573618888855, "scale": 26.137269973754883}}, "43": {"attention": {"k_proj": {"bias": 1.6400136947631836, "kernel": 43.884910583496094}, "out_proj": {"bias": 1.4834134578704834, "kernel": 42.488441467285156}, "q_proj": {"bias": 1.5609513521194458, "kernel": 44.40974426269531}, "v_proj": {"bias": 0.6456707715988159, "kernel": 40.85655975341797}}, "feed_forward": {"intermediate_dense": {"bias": 2.4410853385925293, "kernel": 93.97550964355469}, "output_dense": {"bias": 0.971923828125, "kernel": 97.76197052001953}}, "final_layer_norm": {"bias": 2.2073206901550293, "scale": 32.08831024169922}, "layer_norm": {"bias": 1.9457297325134277, "scale": 24.571487426757812}}, "44": {"attention": {"k_proj": {"bias": 2.736769914627075, "kernel": 42.59820556640625}, "out_proj": {"bias": 1.2384448051452637, "kernel": 45.90458679199219}, "q_proj": {"bias": 1.5544309616088867, "kernel": 43.10401916503906}, "v_proj": {"bias": 0.4339163303375244, "kernel": 45.27587890625}}, "feed_forward": {"intermediate_dense": {"bias": 2.3976941108703613, "kernel": 92.50536346435547}, "output_dense": {"bias": 0.8718729019165039, "kernel": 95.40474700927734}}, "final_layer_norm": {"bias": 2.2857229709625244, "scale": 34.305206298828125}, "layer_norm": {"bias": 1.8322646617889404, "scale": 24.769798278808594}}, "45": {"attention": {"k_proj": {"bias": 2.3045191764831543, "kernel": 41.025367736816406}, "out_proj": {"bias": 1.1059207916259766, "kernel": 49.433837890625}, "q_proj": {"bias": 1.5799487829208374, "kernel": 41.23019790649414}, "v_proj": {"bias": 0.4706912934780121, "kernel": 49.692665100097656}}, "feed_forward": {"intermediate_dense": {"bias": 2.3830413818359375, "kernel": 89.87250518798828}, "output_dense": {"bias": 0.9723072052001953, "kernel": 91.52435302734375}}, "final_layer_norm": {"bias": 1.8866634368896484, "scale": 33.39029312133789}, "layer_norm": {"bias": 1.680398941040039, "scale": 23.549400329589844}}, "46": {"attention": {"k_proj": {"bias": 1.8261332511901855, "kernel": 40.715065002441406}, "out_proj": {"bias": 0.8854400515556335, "kernel": 51.898719787597656}, "q_proj": {"bias": 1.6973485946655273, "kernel": 41.74032211303711}, "v_proj": {"bias": 0.47354304790496826, "kernel": 52.74561309814453}}, "feed_forward": {"intermediate_dense": {"bias": 2.2698826789855957, "kernel": 85.94120025634766}, "output_dense": {"bias": 1.1624335050582886, "kernel": 83.30751037597656}}, "final_layer_norm": {"bias": 1.7028648853302002, "scale": 29.320556640625}, "layer_norm": {"bias": 1.506533145904541, "scale": 22.56432342529297}}, "47": {"attention": {"k_proj": {"bias": 1.2841383218765259, "kernel": 43.89749526977539}, "out_proj": {"bias": 0.7760035991668701, "kernel": 48.40948486328125}, "q_proj": {"bias": 1.8595157861709595, "kernel": 45.508384704589844}, "v_proj": {"bias": 0.7395519018173218, "kernel": 49.07368469238281}}, "feed_forward": {"intermediate_dense": {"bias": 2.2525475025177, "kernel": 82.39459228515625}, "output_dense": {"bias": 0.6880208849906921, "kernel": 77.71249389648438}}, "final_layer_norm": {"bias": 1.4860002994537354, "scale": 23.946605682373047}, "layer_norm": {"bias": 1.5497658252716064, "scale": 20.52420997619629}}, "5": {"attention": {"k_proj": {"bias": 1.0119032859802246, "kernel": 56.31935119628906}, "out_proj": {"bias": 1.6057957410812378, "kernel": 55.774261474609375}, "q_proj": {"bias": 3.061042308807373, "kernel": 56.34562683105469}, "v_proj": {"bias": 0.37706372141838074, "kernel": 55.92870330810547}}, "feed_forward": {"intermediate_dense": {"bias": 2.438316822052002, "kernel": 114.76457214355469}, "output_dense": {"bias": 0.9636346101760864, "kernel": 103.40338134765625}}, "final_layer_norm": {"bias": 1.989949345588684, "scale": 22.142147064208984}, "layer_norm": {"bias": 1.836284875869751, "scale": 20.958904266357422}}, "6": {"attention": {"k_proj": {"bias": 1.1155825853347778, "kernel": 59.055572509765625}, "out_proj": {"bias": 1.6090573072433472, "kernel": 56.853553771972656}, "q_proj": {"bias": 3.048717975616455, "kernel": 59.478973388671875}, "v_proj": {"bias": 0.410391241312027, "kernel": 56.41505813598633}}, "feed_forward": {"intermediate_dense": {"bias": 2.4068522453308105, "kernel": 114.2471694946289}, "output_dense": {"bias": 0.8525809049606323, "kernel": 103.95584106445312}}, "final_layer_norm": {"bias": 2.2757976055145264, "scale": 21.61276626586914}, "layer_norm": {"bias": 1.9055910110473633, "scale": 22.35369110107422}}, "7": {"attention": {"k_proj": {"bias": 0.9782332181930542, "kernel": 57.479583740234375}, "out_proj": {"bias": 1.4624639749526978, "kernel": 56.76895523071289}, "q_proj": {"bias": 2.7724175453186035, "kernel": 57.976966857910156}, "v_proj": {"bias": 0.46141791343688965, "kernel": 55.92349624633789}}, "feed_forward": {"intermediate_dense": {"bias": 2.4027278423309326, "kernel": 113.92277526855469}, "output_dense": {"bias": 0.6965302228927612, "kernel": 103.7275619506836}}, "final_layer_norm": {"bias": 2.259871244430542, "scale": 21.703615188598633}, "layer_norm": {"bias": 1.866142988204956, "scale": 21.641742706298828}}, "8": {"attention": {"k_proj": {"bias": 1.0691603422164917, "kernel": 57.60441970825195}, "out_proj": {"bias": 1.2814300060272217, "kernel": 57.156776428222656}, "q_proj": {"bias": 2.776516914367676, "kernel": 57.734466552734375}, "v_proj": {"bias": 0.434237003326416, "kernel": 56.514854431152344}}, "feed_forward": {"intermediate_dense": {"bias": 2.438565254211426, "kernel": 113.38650512695312}, "output_dense": {"bias": 0.6708989143371582, "kernel": 103.30772399902344}}, "final_layer_norm": {"bias": 2.1530230045318604, "scale": 21.4518985748291}, "layer_norm": {"bias": 1.8316454887390137, "scale": 21.361042022705078}}, "9": {"attention": {"k_proj": {"bias": 1.1614017486572266, "kernel": 59.272682189941406}, "out_proj": {"bias": 1.5716955661773682, "kernel": 59.24287414550781}, "q_proj": {"bias": 2.592710018157959, "kernel": 59.63836669921875}, "v_proj": {"bias": 0.5274032354354858, "kernel": 58.61853790283203}}, "feed_forward": {"intermediate_dense": {"bias": 2.498476982116699, "kernel": 111.90713500976562}, "output_dense": {"bias": 0.7959620952606201, "kernel": 103.15461730957031}}, "final_layer_norm": {"bias": 2.1939809322357178, "scale": 20.66909408569336}, "layer_norm": {"bias": 2.0584192276000977, "scale": 23.691593170166016}}}, "pos_conv_embed": {"conv": {"bias": 6.139623641967773, "weight_g": 9.604846954345703, "weight_v": 124.31410217285156}}}, "feature_extractor": {"conv_layers": {"0": {"conv": {"bias": 2.0290679931640625, "kernel": 20.55536460876465}, "layer_norm": {"bias": 4.550922393798828, "scale": 16.167570114135742}}, "1": {"conv": {"bias": 1.7790228128433228, "kernel": 51.24136734008789}, "layer_norm": {"bias": 5.962646961212158, "scale": 23.268157958984375}}, "2": {"conv": {"bias": 1.140576720237732, "kernel": 46.50312042236328}, "layer_norm": {"bias": 4.176670551300049, "scale": 20.370853424072266}}, "3": {"conv": {"bias": 0.6725863218307495, "kernel": 44.397525787353516}, "layer_norm": {"bias": 3.888174533843994, "scale": 17.53795051574707}}, "4": {"conv": {"bias": 0.6373162269592285, "kernel": 41.314056396484375}, "layer_norm": {"bias": 2.385471820831299, "scale": 16.34571647644043}}, "5": {"conv": {"bias": 0.5147221684455872, "kernel": 37.479759216308594}, "layer_norm": {"bias": 2.020900011062622, "scale": 17.064470291137695}}, "6": {"conv": {"bias": 0.4947893023490906, "kernel": 40.64780044555664}, "layer_norm": {"bias": 0.5876954793930054, "scale": 19.058603286743164}}}}, "feature_projection": {"layer_norm": {"bias": 6.456684589385986, "scale": 16.53719139099121}, "projection": {"bias": 2.147587776184082, "kernel": 43.111968994140625}}, "masked_spec_embed": 11.914372444152832}}, "train/learning_rate": 0.0003397299733478576, "train/loss": 0.27065393328666687, "train/param_norm": 1370.627197265625, "_runtime": 90372, "_timestamp": 1659383116, "_step": 31900, "eval/loss": 0.5839321613311768, "eval/wer": 0.42758277671575956, "eval/cer": 0.12089792986748961, "eval/step_4k": {"_type": "table-file", "path": "media/table/eval/step_4k_4000_af4cafd73c286841ef2f.table.json", "sha256": "af4cafd73c286841ef2fce257a64583667ab5412cd6837e4b951b2f851540450", "size": 24260, "artifact_path": "wandb-client-artifact://8wsujunwuradmkiy9teyal5atub99m9zxf1gks0x84p3y9bcbrdwjhad89ar5fxnyqtn8bopmk4501qsp1nuyvfeafw7p4spyxkt5zczfl5bxv88dzgxi32ukaj3dp4j:latest/eval/step_4k.table.json", "_latest_artifact_path": "wandb-client-artifact://8wsujunwuradmkiy9teyal5atub99m9zxf1gks0x84p3y9bcbrdwjhad89ar5fxnyqtn8bopmk4501qsp1nuyvfeafw7p4spyxkt5zczfl5bxv88dzgxi32ukaj3dp4j:latest/eval/step_4k.table.json", "ncols": 2, "nrows": 50}, "eval/step_8k": {"_type": "table-file", "path": "media/table/eval/step_8k_8000_c8ddc6e8e3a9e52ebbba.table.json", "sha256": "c8ddc6e8e3a9e52ebbbae9ac6ec8bb7ae6684781548fb4ea5c57a4b03a72d655", "size": 25822, "artifact_path": "wandb-client-artifact://och35iwsgdf5e7r6ebwp8bo4p13eowiyessoabvv4cf6keb0gk4e1577q2io23l2jh4jrzauz2qyodfw4w6u4eyf8llym88t1brov3snl0vrwcrq3dalvmazc40labfc:latest/eval/step_8k.table.json", "_latest_artifact_path": "wandb-client-artifact://och35iwsgdf5e7r6ebwp8bo4p13eowiyessoabvv4cf6keb0gk4e1577q2io23l2jh4jrzauz2qyodfw4w6u4eyf8llym88t1brov3snl0vrwcrq3dalvmazc40labfc:latest/eval/step_8k.table.json", "ncols": 2, "nrows": 50}, "eval/step_12k": {"_type": "table-file", "path": "media/table/eval/step_12k_12000_697630eb77c56222f807.table.json", "sha256": "697630eb77c56222f80728b3497df5ebfe62fb1dd060725ab84ec28fcf8448a3", "size": 25625, "artifact_path": "wandb-client-artifact://ndqwcshgeo7e3e4lndtrefve494sq9zmx9n9lrqdirtgy63uloydfb95oh1cytys0xi7ugpxbq1rub03y9scmcm41ocpbk826sbeejkgr3aubqet78b4jx4d8fb3z14k:latest/eval/step_12k.table.json", "_latest_artifact_path": "wandb-client-artifact://ndqwcshgeo7e3e4lndtrefve494sq9zmx9n9lrqdirtgy63uloydfb95oh1cytys0xi7ugpxbq1rub03y9scmcm41ocpbk826sbeejkgr3aubqet78b4jx4d8fb3z14k:latest/eval/step_12k.table.json", "ncols": 2, "nrows": 50}, "eval/step_16k": {"_type": "table-file", "path": "media/table/eval/step_16k_16000_a8af015baca8352e331a.table.json", "sha256": "a8af015baca8352e331a32965ddaa7fe22e2119a1c1256e539aedfd2cb876b87", "size": 25878, "artifact_path": "wandb-client-artifact://5lnl6ihmavrhu81b4ehn69ru2j2zbl0h27qv20im44v9928o4s69g9nvnb8oni6t0b921jwgbo24pz870kgos572o5h3vkwm71kr1brda3f3ooretb0u164vptzypekc:latest/eval/step_16k.table.json", "_latest_artifact_path": "wandb-client-artifact://5lnl6ihmavrhu81b4ehn69ru2j2zbl0h27qv20im44v9928o4s69g9nvnb8oni6t0b921jwgbo24pz870kgos572o5h3vkwm71kr1brda3f3ooretb0u164vptzypekc:latest/eval/step_16k.table.json", "ncols": 2, "nrows": 50}, "eval/step_20k": {"_type": "table-file", "path": "media/table/eval/step_20k_20000_37ce73b5cf7c7934cf62.table.json", "sha256": "37ce73b5cf7c7934cf62628174d0b0af065414de25e751b9e98983313d2b352e", "size": 25998, "artifact_path": "wandb-client-artifact://dhxussc15vwxa3x78tfa9270hif7fap7tu63y3eysgx549vfxso9k96434vhylx2uacjad6ldxuh7iavo5ogtn6fqtsv0u6d60zpvy5g99bbhwo8mmzkveldckon4ngf:latest/eval/step_20k.table.json", "_latest_artifact_path": "wandb-client-artifact://dhxussc15vwxa3x78tfa9270hif7fap7tu63y3eysgx549vfxso9k96434vhylx2uacjad6ldxuh7iavo5ogtn6fqtsv0u6d60zpvy5g99bbhwo8mmzkveldckon4ngf:latest/eval/step_20k.table.json", "ncols": 2, "nrows": 50}, "eval/step_24k": {"_type": "table-file", "path": "media/table/eval/step_24k_24000_6d0ed7e79108396fc292.table.json", "sha256": "6d0ed7e79108396fc292429957faec4cfead67d1cb5df1dfb6e4064ac1b8efd8", "size": 26106, "artifact_path": "wandb-client-artifact://vc70e9r1bqlbymt7rqzpvpurafx4zsrts0ic4qbqjmtia7a4qefxrwovecenb19dn3y65kbrjbmz63f1534kl7xoa0xst4f09yfus7kr0h0wn4i1hua9wqcf4hmyqgi6:latest/eval/step_24k.table.json", "_latest_artifact_path": "wandb-client-artifact://vc70e9r1bqlbymt7rqzpvpurafx4zsrts0ic4qbqjmtia7a4qefxrwovecenb19dn3y65kbrjbmz63f1534kl7xoa0xst4f09yfus7kr0h0wn4i1hua9wqcf4hmyqgi6:latest/eval/step_24k.table.json", "ncols": 2, "nrows": 50}, "eval/step_28k": {"_type": "table-file", "path": "media/table/eval/step_28k_28000_7186c63d506b9c841f41.table.json", "sha256": "7186c63d506b9c841f410c33dd5d77206b2d413f991c3b48e3b2b1265afbc518", "size": 26279, "artifact_path": "wandb-client-artifact://fdox4htembz1otwyfgt305vpupai0gi1hwwh6nh5zybkkex9v54fvigfcehdhyuj2kffoxiwqocy0n6gno40mw2grt1mc34m2kbnvn5z6b60nr5wh5uh1w8f9wmgicgs:latest/eval/step_28k.table.json", "_latest_artifact_path": "wandb-client-artifact://fdox4htembz1otwyfgt305vpupai0gi1hwwh6nh5zybkkex9v54fvigfcehdhyuj2kffoxiwqocy0n6gno40mw2grt1mc34m2kbnvn5z6b60nr5wh5uh1w8f9wmgicgs:latest/eval/step_28k.table.json", "ncols": 2, "nrows": 50}} \ No newline at end of file