{ "metadata": { "total_size": 44541329408 }, "weight_map": { "decoder/block/0/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00005.msgpack", "decoder/block/0/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "decoder/block/0/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00005.msgpack", "decoder/block/0/layer/0/SelfAttention/relative_attention_bias/embedding": "flax_model-00002-of-00005.msgpack", "decoder/block/0/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00005.msgpack", "decoder/block/0/layer/0/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "decoder/block/0/layer/1/EncDecAttention/k/kernel": "flax_model-00002-of-00005.msgpack", "decoder/block/0/layer/1/EncDecAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "decoder/block/0/layer/1/EncDecAttention/q/kernel": "flax_model-00002-of-00005.msgpack", "decoder/block/0/layer/1/EncDecAttention/v/kernel": "flax_model-00002-of-00005.msgpack", "decoder/block/0/layer/1/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "decoder/block/0/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00005.msgpack", "decoder/block/0/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00005.msgpack", "decoder/block/0/layer/2/DenseReluDense/wo/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/0/layer/2/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/0/SelfAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/0/SelfAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/0/SelfAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/0/SelfAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/0/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/1/EncDecAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/1/EncDecAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/1/EncDecAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/1/EncDecAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/1/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/2/DenseReluDense/wo/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/1/layer/2/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/10/layer/0/SelfAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/10/layer/0/SelfAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/10/layer/0/SelfAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/10/layer/0/SelfAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/10/layer/0/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/10/layer/1/EncDecAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/10/layer/1/EncDecAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/10/layer/1/EncDecAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/10/layer/1/EncDecAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/10/layer/1/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/10/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/10/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/10/layer/2/DenseReluDense/wo/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/10/layer/2/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/0/SelfAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/0/SelfAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/0/SelfAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/0/SelfAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/0/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/1/EncDecAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/1/EncDecAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/1/EncDecAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/1/EncDecAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/1/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/2/DenseReluDense/wo/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/11/layer/2/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/0/SelfAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/0/SelfAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/0/SelfAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/0/SelfAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/0/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/1/EncDecAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/1/EncDecAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/1/EncDecAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/1/EncDecAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/1/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/2/DenseReluDense/wo/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/12/layer/2/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/0/SelfAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/0/SelfAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/0/SelfAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/0/SelfAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/0/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/1/EncDecAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/1/EncDecAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/1/EncDecAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/1/EncDecAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/1/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/2/DenseReluDense/wo/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/13/layer/2/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/0/SelfAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/0/SelfAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/0/SelfAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/0/SelfAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/0/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/1/EncDecAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/1/EncDecAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/1/EncDecAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/1/EncDecAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/1/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/2/DenseReluDense/wo/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/14/layer/2/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/0/SelfAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/0/SelfAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/0/SelfAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/0/SelfAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/0/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/1/EncDecAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/1/EncDecAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/1/EncDecAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/1/EncDecAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/1/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/2/DenseReluDense/wo/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/15/layer/2/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/0/SelfAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/0/SelfAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/0/SelfAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/0/SelfAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/0/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/1/EncDecAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/1/EncDecAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/1/EncDecAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/1/EncDecAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/1/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/2/DenseReluDense/wo/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/16/layer/2/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/0/SelfAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/0/SelfAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/0/SelfAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/0/SelfAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/0/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/1/EncDecAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/1/EncDecAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/1/EncDecAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/1/EncDecAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/1/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/2/DenseReluDense/wo/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/17/layer/2/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/0/SelfAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/0/SelfAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/0/SelfAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/0/SelfAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/0/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/1/EncDecAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/1/EncDecAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/1/EncDecAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/1/EncDecAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/1/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/2/DenseReluDense/wo/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/18/layer/2/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/0/SelfAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/0/SelfAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/0/SelfAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/0/SelfAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/0/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/1/EncDecAttention/k/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/1/EncDecAttention/o/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/1/EncDecAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/1/EncDecAttention/v/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/1/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/2/DenseReluDense/wo/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/19/layer/2/layer_norm/weight": "flax_model-00004-of-00005.msgpack", "decoder/block/2/layer/0/SelfAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/2/layer/0/SelfAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/2/layer/0/SelfAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/2/layer/0/SelfAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/2/layer/0/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/2/layer/1/EncDecAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/2/layer/1/EncDecAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/2/layer/1/EncDecAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/2/layer/1/EncDecAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/2/layer/1/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/2/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/2/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/2/layer/2/DenseReluDense/wo/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/2/layer/2/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/20/layer/0/SelfAttention/k/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/20/layer/0/SelfAttention/o/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/20/layer/0/SelfAttention/q/kernel": "flax_model-00004-of-00005.msgpack", "decoder/block/20/layer/0/SelfAttention/v/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/20/layer/0/layer_norm/weight": "flax_model-00005-of-00005.msgpack", "decoder/block/20/layer/1/EncDecAttention/k/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/20/layer/1/EncDecAttention/o/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/20/layer/1/EncDecAttention/q/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/20/layer/1/EncDecAttention/v/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/20/layer/1/layer_norm/weight": "flax_model-00005-of-00005.msgpack", "decoder/block/20/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/20/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/20/layer/2/DenseReluDense/wo/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/20/layer/2/layer_norm/weight": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/0/SelfAttention/k/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/0/SelfAttention/o/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/0/SelfAttention/q/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/0/SelfAttention/v/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/0/layer_norm/weight": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/1/EncDecAttention/k/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/1/EncDecAttention/o/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/1/EncDecAttention/q/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/1/EncDecAttention/v/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/1/layer_norm/weight": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/2/DenseReluDense/wo/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/21/layer/2/layer_norm/weight": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/0/SelfAttention/k/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/0/SelfAttention/o/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/0/SelfAttention/q/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/0/SelfAttention/v/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/0/layer_norm/weight": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/1/EncDecAttention/k/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/1/EncDecAttention/o/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/1/EncDecAttention/q/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/1/EncDecAttention/v/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/1/layer_norm/weight": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/2/DenseReluDense/wo/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/22/layer/2/layer_norm/weight": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/0/SelfAttention/k/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/0/SelfAttention/o/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/0/SelfAttention/q/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/0/SelfAttention/v/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/0/layer_norm/weight": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/1/EncDecAttention/k/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/1/EncDecAttention/o/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/1/EncDecAttention/q/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/1/EncDecAttention/v/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/1/layer_norm/weight": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/2/DenseReluDense/wo/kernel": "flax_model-00005-of-00005.msgpack", "decoder/block/23/layer/2/layer_norm/weight": "flax_model-00005-of-00005.msgpack", "decoder/block/3/layer/0/SelfAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/3/layer/0/SelfAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/3/layer/0/SelfAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/3/layer/0/SelfAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/3/layer/0/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/3/layer/1/EncDecAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/3/layer/1/EncDecAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/3/layer/1/EncDecAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/3/layer/1/EncDecAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/3/layer/1/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/3/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/3/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/3/layer/2/DenseReluDense/wo/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/3/layer/2/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/0/SelfAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/0/SelfAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/0/SelfAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/0/SelfAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/0/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/1/EncDecAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/1/EncDecAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/1/EncDecAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/1/EncDecAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/1/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/2/DenseReluDense/wo/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/4/layer/2/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/0/SelfAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/0/SelfAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/0/SelfAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/0/SelfAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/0/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/1/EncDecAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/1/EncDecAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/1/EncDecAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/1/EncDecAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/1/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/2/DenseReluDense/wo/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/5/layer/2/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/0/SelfAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/0/SelfAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/0/SelfAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/0/SelfAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/0/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/1/EncDecAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/1/EncDecAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/1/EncDecAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/1/EncDecAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/1/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/2/DenseReluDense/wo/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/6/layer/2/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/0/SelfAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/0/SelfAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/0/SelfAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/0/SelfAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/0/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/1/EncDecAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/1/EncDecAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/1/EncDecAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/1/EncDecAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/1/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/2/DenseReluDense/wo/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/7/layer/2/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/0/SelfAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/0/SelfAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/0/SelfAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/0/SelfAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/0/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/1/EncDecAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/1/EncDecAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/1/EncDecAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/1/EncDecAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/1/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/2/DenseReluDense/wo/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/8/layer/2/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/0/SelfAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/0/SelfAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/0/SelfAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/0/SelfAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/0/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/1/EncDecAttention/k/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/1/EncDecAttention/o/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/1/EncDecAttention/q/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/1/EncDecAttention/v/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/1/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/2/DenseReluDense/wo/kernel": "flax_model-00003-of-00005.msgpack", "decoder/block/9/layer/2/layer_norm/weight": "flax_model-00003-of-00005.msgpack", "decoder/final_layer_norm/weight": "flax_model-00005-of-00005.msgpack", "encoder/block/0/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/0/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/0/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/0/layer/0/SelfAttention/relative_attention_bias/embedding": "flax_model-00001-of-00005.msgpack", "encoder/block/0/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/0/layer/0/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/0/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/0/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/0/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/0/layer/1/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/1/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/1/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/1/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/1/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/1/layer/0/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/1/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/1/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/1/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/1/layer/1/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/10/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/10/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/10/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/10/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/10/layer/0/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/10/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/10/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/10/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/10/layer/1/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/11/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/11/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/11/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/11/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/11/layer/0/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/11/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/11/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/11/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/11/layer/1/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/12/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/12/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/12/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/12/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/12/layer/0/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/12/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/12/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/12/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/12/layer/1/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/13/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/13/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/13/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/13/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/13/layer/0/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/13/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/13/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/13/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/13/layer/1/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/14/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/14/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/14/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/14/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/14/layer/0/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/14/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/14/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/14/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/14/layer/1/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/15/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/15/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/15/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/15/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/15/layer/0/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/15/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/15/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/15/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/15/layer/1/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/16/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/16/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/16/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/16/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/16/layer/0/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/16/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/16/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/16/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/16/layer/1/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/17/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/17/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/17/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/17/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/17/layer/0/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/17/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/17/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/17/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/17/layer/1/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/18/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/18/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/18/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/18/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/18/layer/0/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/18/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/18/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/18/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/18/layer/1/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/19/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/19/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/19/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/19/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/19/layer/0/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/19/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/19/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/19/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/19/layer/1/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/2/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/2/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/2/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/2/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/2/layer/0/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/2/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/2/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/2/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/2/layer/1/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/20/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/20/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/20/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/20/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/20/layer/0/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/20/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/20/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/20/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/20/layer/1/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/21/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/21/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/21/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/21/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/21/layer/0/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/21/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/21/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/21/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/21/layer/1/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/22/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/22/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/22/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/22/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/22/layer/0/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/22/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/22/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/22/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/22/layer/1/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/23/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/23/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/23/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/23/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/23/layer/0/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/23/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/23/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/23/layer/1/DenseReluDense/wo/kernel": "flax_model-00002-of-00005.msgpack", "encoder/block/23/layer/1/layer_norm/weight": "flax_model-00002-of-00005.msgpack", "encoder/block/3/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/3/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/3/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/3/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/3/layer/0/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/3/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/3/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/3/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/3/layer/1/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/4/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/4/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/4/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/4/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/4/layer/0/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/4/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/4/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/4/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/4/layer/1/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/5/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/5/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/5/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/5/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/5/layer/0/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/5/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/5/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/5/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/5/layer/1/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/6/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/6/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/6/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/6/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/6/layer/0/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/6/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/6/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/6/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/6/layer/1/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/7/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/7/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/7/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/7/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/7/layer/0/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/7/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/7/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/7/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/7/layer/1/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/8/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/8/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/8/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/8/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/8/layer/0/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/8/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/8/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/8/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/8/layer/1/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/9/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/9/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/9/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/9/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/9/layer/0/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/block/9/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/9/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/9/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00005.msgpack", "encoder/block/9/layer/1/layer_norm/weight": "flax_model-00001-of-00005.msgpack", "encoder/final_layer_norm/weight": "flax_model-00002-of-00005.msgpack", "lm_head/kernel": "flax_model-00005-of-00005.msgpack", "shared/embedding": "flax_model-00001-of-00005.msgpack" } }