{ "metadata": { "total_size": 6173219840 }, "weight_map": { "model/decoder/embed_positions/embedding": "flax_model-00001-of-00002.msgpack", "model/decoder/embed_tokens/embedding": "flax_model-00001-of-00002.msgpack", "model/decoder/layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/0/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/0/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/1/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/10/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/11/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/12/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/13/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/14/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/15/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/16/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/17/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/18/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/19/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/2/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/fc2/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/20/fc2/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/20/final_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/20/final_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/20/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/20/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/21/encoder_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/encoder_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/encoder_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/encoder_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/encoder_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/encoder_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/encoder_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/encoder_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/encoder_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/fc1/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/fc1/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/fc2/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/fc2/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/final_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/final_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/self_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/self_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/self_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/self_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/self_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/self_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/self_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/self_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/21/self_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/encoder_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/encoder_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/encoder_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/encoder_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/encoder_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/encoder_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/encoder_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/encoder_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/encoder_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/fc1/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/fc1/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/fc2/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/fc2/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/final_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/final_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/self_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/self_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/self_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/self_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/self_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/self_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/self_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/self_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/22/self_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/encoder_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/encoder_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/encoder_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/encoder_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/encoder_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/encoder_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/encoder_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/encoder_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/encoder_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/fc1/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/fc1/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/fc2/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/fc2/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/final_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/final_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/self_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/self_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/self_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/self_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/self_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/self_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/self_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/self_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/23/self_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/encoder_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/encoder_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/encoder_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/encoder_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/encoder_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/encoder_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/encoder_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/encoder_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/encoder_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/fc1/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/fc1/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/fc2/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/fc2/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/final_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/final_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/self_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/self_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/self_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/self_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/self_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/self_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/self_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/self_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/24/self_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/encoder_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/encoder_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/encoder_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/encoder_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/encoder_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/encoder_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/encoder_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/encoder_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/encoder_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/fc1/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/fc1/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/fc2/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/fc2/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/final_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/final_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/self_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/self_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/self_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/self_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/self_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/self_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/self_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/self_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/25/self_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/encoder_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/encoder_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/encoder_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/encoder_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/encoder_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/encoder_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/encoder_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/encoder_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/encoder_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/fc1/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/fc1/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/fc2/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/fc2/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/final_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/final_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/self_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/self_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/self_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/self_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/self_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/self_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/self_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/self_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/26/self_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/encoder_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/encoder_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/encoder_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/encoder_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/encoder_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/encoder_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/encoder_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/encoder_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/encoder_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/fc1/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/fc1/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/fc2/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/fc2/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/final_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/final_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/self_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/self_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/self_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/self_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/self_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/self_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/self_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/self_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/27/self_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/encoder_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/encoder_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/encoder_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/encoder_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/encoder_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/encoder_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/encoder_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/encoder_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/encoder_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/fc1/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/fc1/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/fc2/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/fc2/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/final_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/final_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/self_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/self_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/self_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/self_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/self_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/self_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/self_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/self_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/28/self_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/encoder_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/encoder_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/encoder_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/encoder_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/encoder_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/encoder_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/encoder_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/encoder_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/encoder_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/fc1/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/fc1/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/fc2/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/fc2/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/final_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/final_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/self_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/self_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/self_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/self_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/self_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/self_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/self_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/self_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/29/self_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/3/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/3/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/30/encoder_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/encoder_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/encoder_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/encoder_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/encoder_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/encoder_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/encoder_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/encoder_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/encoder_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/fc1/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/fc1/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/fc2/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/fc2/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/final_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/final_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/self_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/self_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/self_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/self_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/self_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/self_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/self_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/self_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/30/self_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/encoder_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/encoder_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/encoder_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/encoder_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/encoder_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/encoder_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/encoder_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/encoder_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/encoder_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/fc1/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/fc1/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/fc2/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/fc2/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/final_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/final_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/self_attn/k_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/self_attn/out_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/self_attn/out_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/self_attn/q_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/self_attn/q_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/self_attn/v_proj/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/self_attn/v_proj/kernel": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/self_attn_layer_norm/bias": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/31/self_attn_layer_norm/scale": "flax_model-00002-of-00002.msgpack", "model/decoder/layers/4/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/4/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/5/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/6/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/7/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/8/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/encoder_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/encoder_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/encoder_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/encoder_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/encoder_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/encoder_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/encoder_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/encoder_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/encoder_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/decoder/layers/9/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/conv1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/conv1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/conv2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/conv2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/embed_positions/embedding": "flax_model-00001-of-00002.msgpack", "model/encoder/layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/0/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/1/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/10/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/11/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/12/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/13/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/14/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/15/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/16/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/17/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/18/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/19/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/2/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/20/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/21/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/22/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/23/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/24/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/25/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/26/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/27/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/28/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/29/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/3/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/30/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/31/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/4/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/5/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/6/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/7/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/8/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/fc1/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/fc1/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/fc2/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/fc2/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/final_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/final_layer_norm/scale": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/self_attn/k_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/self_attn/out_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/self_attn/out_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/self_attn/q_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/self_attn/q_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/self_attn/v_proj/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/self_attn/v_proj/kernel": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/self_attn_layer_norm/bias": "flax_model-00001-of-00002.msgpack", "model/encoder/layers/9/self_attn_layer_norm/scale": "flax_model-00001-of-00002.msgpack" } }