scalable_t5x_tiny_test / t5x /model-info.txt
Ahmed Elnaggar
upload tiny model
b144aaa
Variable decoder/decoder/encoder_decoder_attention/key/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None)
Variable decoder/decoder/encoder_decoder_attention/out/kernel size 576 shape (heads=4, layers=3, kv=6, embed=8) partition spec ('model', None, None, None)
Variable decoder/decoder/encoder_decoder_attention/query/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None)
Variable decoder/decoder/encoder_decoder_attention/value/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None)
Variable decoder/decoder/mlp/wi_0/kernel size 384 shape (embed=8, layers=3, mlp=16) partition spec (None, None, 'model')
Variable decoder/decoder/mlp/wi_1/kernel size 384 shape (embed=8, layers=3, mlp=16) partition spec (None, None, 'model')
Variable decoder/decoder/mlp/wo/kernel size 384 shape (mlp=16, layers=3, embed=8) partition spec ('model', None, None)
Variable decoder/decoder/pre_cross_attention_layer_norm/scale size 24 shape (embed=8, layers=3) partition spec (None, None)
Variable decoder/decoder/pre_mlp_layer_norm/scale size 24 shape (embed=8, layers=3) partition spec (None, None)
Variable decoder/decoder/pre_self_attention_layer_norm/scale size 24 shape (embed=8, layers=3) partition spec (None, None)
Variable decoder/decoder/relpos_bias/rel_embedding size 768 shape (heads=4, layers=3, relpos_buckets=64) partition spec ('model', None, None)
Variable decoder/decoder/self_attention/key/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None)
Variable decoder/decoder/self_attention/out/kernel size 576 shape (heads=4, layers=3, kv=6, embed=8) partition spec ('model', None, None, None)
Variable decoder/decoder/self_attention/query/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None)
Variable decoder/decoder/self_attention/value/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None)
Variable decoder/decoder_norm/scale size 8 shape (embed=8) partition spec (None,)
Variable decoder/logits_dense/kernel size 2048 shape (embed=8, vocab=256) partition spec (None, 'model')
Variable encoder/encoder/attention/key/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None)
Variable encoder/encoder/attention/out/kernel size 576 shape (heads=4, layers=3, kv=6, embed=8) partition spec ('model', None, None, None)
Variable encoder/encoder/attention/query/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None)
Variable encoder/encoder/attention/value/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None)
Variable encoder/encoder/mlp/wi_0/kernel size 384 shape (embed=8, layers=3, mlp=16) partition spec (None, None, 'model')
Variable encoder/encoder/mlp/wi_1/kernel size 384 shape (embed=8, layers=3, mlp=16) partition spec (None, None, 'model')
Variable encoder/encoder/mlp/wo/kernel size 384 shape (mlp=16, layers=3, embed=8) partition spec ('model', None, None)
Variable encoder/encoder/pre_attention_layer_norm/scale size 24 shape (embed=8, layers=3) partition spec (None, None)
Variable encoder/encoder/pre_mlp_layer_norm/scale size 24 shape (embed=8, layers=3) partition spec (None, None)
Variable encoder/encoder/relpos_bias/rel_embedding size 768 shape (heads=4, layers=3, relpos_buckets=64) partition spec ('model', None, None)
Variable encoder/encoder_norm/scale size 8 shape (embed=8) partition spec (None,)
Variable token_embedder/embedding size 2048 shape (vocab=256, embed=8) partition spec ('model', None)
Total number of parameters: 14984
Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/v_col size 72 shape (3, 4, 6) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/v_row size 24 shape (8, 3) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/v_col size 72 shape (4, 3, 6) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/v_row size 24 shape (3, 8) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/v_col size 72 shape (3, 4, 6) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/v_row size 24 shape (8, 3) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/v_col size 72 shape (3, 4, 6) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/v_row size 24 shape (8, 3) partition spec None
Variable param_states/decoder/decoder/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/mlp/wi_0/kernel/v_col size 48 shape (3, 16) partition spec None
Variable param_states/decoder/decoder/mlp/wi_0/kernel/v_row size 24 shape (8, 3) partition spec None
Variable param_states/decoder/decoder/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/mlp/wi_1/kernel/v_col size 48 shape (3, 16) partition spec None
Variable param_states/decoder/decoder/mlp/wi_1/kernel/v_row size 24 shape (8, 3) partition spec None
Variable param_states/decoder/decoder/mlp/wo/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/mlp/wo/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/mlp/wo/kernel/v_col size 48 shape (16, 3) partition spec None
Variable param_states/decoder/decoder/mlp/wo/kernel/v_row size 24 shape (3, 8) partition spec None
Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/v size 24 shape (embed=8, layers=3) partition spec (None, None)
Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/v size 24 shape (embed=8, layers=3) partition spec (None, None)
Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/v size 24 shape (embed=8, layers=3) partition spec (None, None)
Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/relpos_bias/rel_embedding/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/relpos_bias/rel_embedding/v size 768 shape (heads=4, layers=3, relpos_buckets=64) partition spec ('model', None, None)
Variable param_states/decoder/decoder/relpos_bias/rel_embedding/v_col size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/relpos_bias/rel_embedding/v_row size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/key/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/key/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/key/kernel/v_col size 72 shape (3, 4, 6) partition spec None
Variable param_states/decoder/decoder/self_attention/key/kernel/v_row size 24 shape (8, 3) partition spec None
Variable param_states/decoder/decoder/self_attention/out/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/out/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/out/kernel/v_col size 72 shape (4, 3, 6) partition spec None
Variable param_states/decoder/decoder/self_attention/out/kernel/v_row size 24 shape (3, 8) partition spec None
Variable param_states/decoder/decoder/self_attention/query/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/query/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/query/kernel/v_col size 72 shape (3, 4, 6) partition spec None
Variable param_states/decoder/decoder/self_attention/query/kernel/v_row size 24 shape (8, 3) partition spec None
Variable param_states/decoder/decoder/self_attention/value/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/value/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/value/kernel/v_col size 72 shape (3, 4, 6) partition spec None
Variable param_states/decoder/decoder/self_attention/value/kernel/v_row size 24 shape (8, 3) partition spec None
Variable param_states/decoder/decoder_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder_norm/scale/v size 8 shape (embed=8) partition spec (None,)
Variable param_states/decoder/decoder_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/decoder/logits_dense/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/logits_dense/kernel/v size 2048 shape (embed=8, vocab=256) partition spec (None, 'model')
Variable param_states/decoder/logits_dense/kernel/v_col size 1 shape (1,) partition spec None
Variable param_states/decoder/logits_dense/kernel/v_row size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/key/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/key/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/key/kernel/v_col size 72 shape (3, 4, 6) partition spec None
Variable param_states/encoder/encoder/attention/key/kernel/v_row size 24 shape (8, 3) partition spec None
Variable param_states/encoder/encoder/attention/out/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/out/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/out/kernel/v_col size 72 shape (4, 3, 6) partition spec None
Variable param_states/encoder/encoder/attention/out/kernel/v_row size 24 shape (3, 8) partition spec None
Variable param_states/encoder/encoder/attention/query/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/query/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/query/kernel/v_col size 72 shape (3, 4, 6) partition spec None
Variable param_states/encoder/encoder/attention/query/kernel/v_row size 24 shape (8, 3) partition spec None
Variable param_states/encoder/encoder/attention/value/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/value/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/value/kernel/v_col size 72 shape (3, 4, 6) partition spec None
Variable param_states/encoder/encoder/attention/value/kernel/v_row size 24 shape (8, 3) partition spec None
Variable param_states/encoder/encoder/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/mlp/wi_0/kernel/v_col size 48 shape (3, 16) partition spec None
Variable param_states/encoder/encoder/mlp/wi_0/kernel/v_row size 24 shape (8, 3) partition spec None
Variable param_states/encoder/encoder/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/mlp/wi_1/kernel/v_col size 48 shape (3, 16) partition spec None
Variable param_states/encoder/encoder/mlp/wi_1/kernel/v_row size 24 shape (8, 3) partition spec None
Variable param_states/encoder/encoder/mlp/wo/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/mlp/wo/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/mlp/wo/kernel/v_col size 48 shape (16, 3) partition spec None
Variable param_states/encoder/encoder/mlp/wo/kernel/v_row size 24 shape (3, 8) partition spec None
Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/v size 24 shape (embed=8, layers=3) partition spec (None, None)
Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/v size 24 shape (embed=8, layers=3) partition spec (None, None)
Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/relpos_bias/rel_embedding/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/relpos_bias/rel_embedding/v size 768 shape (heads=4, layers=3, relpos_buckets=64) partition spec ('model', None, None)
Variable param_states/encoder/encoder/relpos_bias/rel_embedding/v_col size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/relpos_bias/rel_embedding/v_row size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder_norm/scale/v size 8 shape (embed=8) partition spec (None,)
Variable param_states/encoder/encoder_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/token_embedder/embedding/m size 1 shape (1,) partition spec None
Variable param_states/token_embedder/embedding/v size 2048 shape (vocab=256, embed=8) partition spec ('model', None)
Variable param_states/token_embedder/embedding/v_col size 1 shape (1,) partition spec None
Variable param_states/token_embedder/embedding/v_row size 1 shape (1,) partition spec None
Variable step size 1 shape () partition spec None