Variable decoder/decoder_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_0/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_0/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_0/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_0/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_0/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_0/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_0/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_0/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_0/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_0/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_0/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_0/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_0/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_0/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_1/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_1/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_1/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_1/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_1/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_1/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_1/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_1/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_1/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_1/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_1/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_1/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_1/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_1/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_10/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_10/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_10/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_10/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_10/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_10/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_10/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_10/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_10/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_10/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_10/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_10/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_10/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_10/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_11/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_11/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_11/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_11/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_11/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_11/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_11/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_11/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_11/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_11/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_11/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_11/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_11/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_11/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_12/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_12/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_12/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_12/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_12/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_12/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_12/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_12/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_12/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_12/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_12/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_12/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_12/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_12/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_13/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_13/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_13/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_13/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_13/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_13/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_13/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_13/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_13/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_13/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_13/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_13/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_13/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_13/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_14/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_14/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_14/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_14/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_14/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_14/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_14/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_14/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_14/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_14/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_14/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_14/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_14/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_14/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_15/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_15/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_15/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_15/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_15/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_15/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_15/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_15/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_15/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_15/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_15/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_15/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_15/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_15/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_16/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_16/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_16/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_16/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_16/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_16/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_16/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_16/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_16/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_16/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_16/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_16/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_16/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_16/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_17/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_17/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_17/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_17/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_17/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_17/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_17/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_17/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_17/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_17/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_17/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_17/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_17/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_17/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_18/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_18/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_18/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_18/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_18/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_18/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_18/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_18/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_18/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_18/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_18/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_18/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_18/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_18/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_19/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_19/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_19/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_19/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_19/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_19/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_19/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_19/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_19/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_19/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_19/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_19/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_19/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_19/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_2/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_2/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_2/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_2/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_2/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_2/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_2/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_2/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_2/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_2/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_2/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_2/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_2/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_2/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_20/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_20/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_20/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_20/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_20/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_20/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_20/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_20/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_20/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_20/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_20/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_20/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_20/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_20/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_21/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_21/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_21/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_21/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_21/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_21/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_21/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_21/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_21/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_21/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_21/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_21/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_21/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_21/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_22/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_22/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_22/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_22/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_22/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_22/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_22/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_22/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_22/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_22/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_22/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_22/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_22/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_22/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_23/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_23/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_23/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_23/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_23/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_23/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_23/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_23/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_23/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_23/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_23/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_23/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_23/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_23/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_3/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_3/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_3/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_3/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_3/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_3/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_3/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_3/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_3/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_3/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_3/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_3/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_3/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_3/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_4/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_4/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_4/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_4/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_4/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_4/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_4/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_4/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_4/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_4/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_4/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_4/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_4/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_4/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_5/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_5/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_5/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_5/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_5/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_5/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_5/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_5/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_5/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_5/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_5/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_5/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_5/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_5/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_6/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_6/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_6/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_6/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_6/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_6/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_6/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_6/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_6/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_6/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_6/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_6/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_6/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_6/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_7/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_7/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_7/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_7/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_7/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_7/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_7/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_7/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_7/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_7/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_7/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_7/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_7/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_7/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_8/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_8/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_8/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_8/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_8/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_8/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_8/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_8/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_8/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_8/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_8/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_8/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_8/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_8/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_9/encoder_decoder_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_9/encoder_decoder_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_9/encoder_decoder_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_9/encoder_decoder_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_9/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_9/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable decoder/layers_9/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable decoder/layers_9/pre_cross_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_9/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_9/pre_self_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable decoder/layers_9/self_attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_9/self_attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable decoder/layers_9/self_attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/layers_9/self_attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable decoder/logits_dense/kernel size 256114688 shape (embed=1024, vocab=250112) partition spec (None, 'model') Variable decoder/relpos_bias/rel_embedding size 512 shape (heads=16, relpos_buckets=32) partition spec ('model', None) Variable encoder/encoder_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_0/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_0/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_0/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_0/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_0/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_0/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_0/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_0/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_0/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_1/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_1/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_1/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_1/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_1/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_1/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_1/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_1/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_1/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_10/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_10/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_10/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_10/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_10/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_10/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_10/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_10/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_10/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_11/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_11/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_11/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_11/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_11/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_11/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_11/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_11/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_11/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_12/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_12/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_12/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_12/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_12/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_12/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_12/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_12/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_12/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_13/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_13/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_13/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_13/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_13/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_13/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_13/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_13/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_13/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_14/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_14/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_14/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_14/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_14/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_14/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_14/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_14/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_14/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_15/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_15/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_15/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_15/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_15/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_15/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_15/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_15/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_15/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_16/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_16/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_16/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_16/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_16/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_16/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_16/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_16/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_16/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_17/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_17/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_17/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_17/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_17/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_17/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_17/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_17/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_17/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_18/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_18/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_18/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_18/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_18/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_18/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_18/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_18/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_18/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_19/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_19/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_19/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_19/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_19/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_19/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_19/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_19/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_19/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_2/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_2/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_2/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_2/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_2/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_2/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_2/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_2/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_2/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_20/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_20/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_20/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_20/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_20/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_20/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_20/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_20/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_20/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_21/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_21/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_21/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_21/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_21/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_21/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_21/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_21/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_21/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_22/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_22/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_22/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_22/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_22/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_22/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_22/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_22/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_22/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_23/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_23/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_23/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_23/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_23/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_23/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_23/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_23/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_23/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_3/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_3/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_3/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_3/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_3/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_3/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_3/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_3/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_3/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_4/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_4/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_4/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_4/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_4/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_4/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_4/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_4/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_4/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_5/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_5/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_5/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_5/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_5/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_5/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_5/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_5/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_5/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_6/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_6/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_6/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_6/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_6/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_6/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_6/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_6/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_6/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_7/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_7/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_7/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_7/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_7/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_7/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_7/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_7/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_7/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_8/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_8/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_8/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_8/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_8/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_8/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_8/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_8/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_8/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_9/attention/key/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_9/attention/out/kernel size 1048576 shape (joined_kv=1024, embed=1024) partition spec ('model', None) Variable encoder/layers_9/attention/query/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_9/attention/value/kernel size 1048576 shape (embed=1024, joined_kv=1024) partition spec (None, 'model') Variable encoder/layers_9/mlp/wi_0/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_9/mlp/wi_1/kernel size 2883584 shape (embed=1024, mlp=2816) partition spec (None, 'model') Variable encoder/layers_9/mlp/wo/kernel size 2883584 shape (mlp=2816, embed=1024) partition spec ('model', None) Variable encoder/layers_9/pre_attention_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/layers_9/pre_mlp_layer_norm/scale size 1024 shape (embed=1024) partition spec (None,) Variable encoder/relpos_bias/rel_embedding size 512 shape (heads=16, relpos_buckets=32) partition spec ('model', None) Variable token_embedder/embedding size 256114688 shape (vocab=250112, embed=1024) partition spec ('model', None) Total number of parameters: 1229581312 Variable param_states/decoder/decoder_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/decoder_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/decoder_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_0/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_0/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_0/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_0/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_0/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_0/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_0/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_0/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_1/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_1/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_1/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_1/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_1/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_1/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_1/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_1/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_10/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_10/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_10/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_10/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_10/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_10/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_10/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_10/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_11/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_11/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_11/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_11/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_11/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_11/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_11/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_11/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_12/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_12/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_12/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_12/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_12/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_12/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_12/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_12/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_13/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_13/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_13/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_13/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_13/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_13/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_13/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_13/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_14/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_14/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_14/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_14/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_14/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_14/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_14/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_14/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_15/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_15/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_15/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_15/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_15/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_15/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_15/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_15/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_16/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_16/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_16/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_16/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_16/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_16/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_16/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_16/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_17/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_17/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_17/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_17/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_17/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_17/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_17/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_17/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_18/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_18/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_18/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_18/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_18/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_18/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_18/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_18/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_19/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_19/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_19/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_19/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_19/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_19/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_19/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_19/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_2/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_2/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_2/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_2/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_2/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_2/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_2/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_2/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_20/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_20/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_20/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_20/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_20/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_20/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_20/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_20/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_21/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_21/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_21/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_21/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_21/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_21/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_21/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_21/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_22/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_22/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_22/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_22/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_22/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_22/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_22/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_22/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_23/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_23/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_23/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_23/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_23/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_23/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_23/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_23/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_3/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_3/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_3/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_3/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_3/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_3/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_3/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_3/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_4/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_4/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_4/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_4/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_4/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_4/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_4/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_4/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_5/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_5/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_5/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_5/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_5/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_5/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_5/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_5/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_6/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_6/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_6/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_6/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_6/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_6/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_6/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_6/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_7/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_7/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_7/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_7/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_7/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_7/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_7/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_7/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_8/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_8/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_8/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_8/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_8/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_8/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_8/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_8/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/encoder_decoder_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_9/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_9/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/decoder/layers_9/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/pre_cross_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_9/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_9/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/pre_self_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/decoder/layers_9/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/self_attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/self_attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/self_attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/self_attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/self_attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/self_attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/layers_9/self_attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/decoder/layers_9/self_attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/logits_dense/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/logits_dense/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/logits_dense/kernel/v_col size 250112 shape (250112,) partition spec None Variable param_states/decoder/logits_dense/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/decoder/relpos_bias/rel_embedding/m size 1 shape (1,) partition spec None Variable param_states/decoder/relpos_bias/rel_embedding/v size 512 shape (heads=16, relpos_buckets=32) partition spec ('model', None) Variable param_states/decoder/relpos_bias/rel_embedding/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/relpos_bias/rel_embedding/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/encoder_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/encoder_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/encoder_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/encoder_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_0/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_0/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_0/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_0/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_0/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_0/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_0/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_0/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_0/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_0/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_0/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_0/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_0/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_0/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_0/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_0/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_0/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_1/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_1/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_1/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_1/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_1/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_1/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_1/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_1/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_1/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_1/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_1/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_1/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_1/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_1/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_1/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_1/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_1/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_10/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_10/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_10/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_10/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_10/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_10/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_10/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_10/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_10/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_10/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_10/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_10/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_10/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_10/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_10/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_10/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_10/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_11/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_11/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_11/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_11/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_11/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_11/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_11/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_11/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_11/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_11/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_11/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_11/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_11/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_11/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_11/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_11/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_11/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_12/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_12/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_12/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_12/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_12/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_12/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_12/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_12/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_12/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_12/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_12/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_12/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_12/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_12/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_12/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_12/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_12/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_13/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_13/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_13/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_13/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_13/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_13/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_13/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_13/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_13/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_13/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_13/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_13/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_13/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_13/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_13/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_13/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_13/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_14/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_14/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_14/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_14/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_14/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_14/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_14/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_14/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_14/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_14/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_14/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_14/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_14/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_14/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_14/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_14/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_14/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_15/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_15/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_15/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_15/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_15/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_15/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_15/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_15/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_15/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_15/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_15/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_15/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_15/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_15/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_15/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_15/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_15/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_16/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_16/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_16/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_16/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_16/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_16/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_16/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_16/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_16/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_16/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_16/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_16/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_16/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_16/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_16/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_16/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_16/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_17/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_17/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_17/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_17/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_17/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_17/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_17/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_17/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_17/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_17/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_17/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_17/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_17/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_17/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_17/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_17/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_17/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_18/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_18/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_18/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_18/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_18/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_18/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_18/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_18/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_18/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_18/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_18/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_18/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_18/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_18/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_18/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_18/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_18/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_19/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_19/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_19/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_19/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_19/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_19/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_19/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_19/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_19/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_19/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_19/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_19/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_19/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_19/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_19/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_19/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_19/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_2/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_2/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_2/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_2/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_2/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_2/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_2/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_2/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_2/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_2/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_2/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_2/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_2/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_2/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_2/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_2/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_2/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_20/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_20/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_20/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_20/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_20/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_20/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_20/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_20/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_20/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_20/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_20/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_20/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_20/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_20/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_20/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_20/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_20/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_21/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_21/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_21/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_21/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_21/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_21/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_21/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_21/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_21/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_21/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_21/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_21/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_21/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_21/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_21/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_21/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_21/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_22/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_22/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_22/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_22/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_22/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_22/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_22/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_22/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_22/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_22/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_22/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_22/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_22/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_22/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_22/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_22/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_22/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_23/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_23/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_23/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_23/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_23/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_23/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_23/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_23/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_23/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_23/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_23/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_23/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_23/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_23/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_23/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_23/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_23/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_3/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_3/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_3/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_3/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_3/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_3/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_3/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_3/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_3/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_3/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_3/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_3/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_3/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_3/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_3/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_3/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_3/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_4/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_4/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_4/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_4/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_4/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_4/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_4/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_4/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_4/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_4/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_4/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_4/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_4/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_4/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_4/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_4/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_4/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_5/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_5/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_5/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_5/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_5/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_5/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_5/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_5/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_5/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_5/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_5/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_5/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_5/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_5/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_5/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_5/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_5/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_6/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_6/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_6/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_6/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_6/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_6/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_6/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_6/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_6/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_6/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_6/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_6/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_6/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_6/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_6/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_6/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_6/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_7/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_7/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_7/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_7/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_7/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_7/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_7/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_7/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_7/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_7/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_7/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_7/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_7/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_7/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_7/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_7/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_7/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_8/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_8/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_8/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_8/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_8/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_8/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_8/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_8/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_8/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_8/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_8/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_8/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_8/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_8/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_8/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_8/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_8/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/attention/key/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_9/attention/key/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_9/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/attention/out/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_9/attention/out/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_9/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/attention/query/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_9/attention/query/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_9/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/attention/value/kernel/v_col size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_9/attention/value/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_9/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/mlp/wi_0/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_9/mlp/wi_0/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_9/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/mlp/wi_1/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_9/mlp/wi_1/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_9/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/mlp/wo/kernel/v_col size 2816 shape (2816,) partition spec None Variable param_states/encoder/layers_9/mlp/wo/kernel/v_row size 1024 shape (1024,) partition spec None Variable param_states/encoder/layers_9/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/pre_attention_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_9/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/pre_mlp_layer_norm/scale/v size 1024 shape (embed=1024) partition spec (None,) Variable param_states/encoder/layers_9/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/layers_9/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/relpos_bias/rel_embedding/m size 1 shape (1,) partition spec None Variable param_states/encoder/relpos_bias/rel_embedding/v size 512 shape (heads=16, relpos_buckets=32) partition spec ('model', None) Variable param_states/encoder/relpos_bias/rel_embedding/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/relpos_bias/rel_embedding/v_row size 1 shape (1,) partition spec None Variable param_states/token_embedder/embedding/m size 1 shape (1,) partition spec None Variable param_states/token_embedder/embedding/v size 1 shape (1,) partition spec None Variable param_states/token_embedder/embedding/v_col size 250112 shape (250112,) partition spec None Variable param_states/token_embedder/embedding/v_row size 1024 shape (1024,) partition spec None Variable step size 1 shape () partition spec None