File size: 26,167 Bytes
b144aaa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
Variable decoder/decoder/encoder_decoder_attention/key/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None) Variable decoder/decoder/encoder_decoder_attention/out/kernel size 576 shape (heads=4, layers=3, kv=6, embed=8) partition spec ('model', None, None, None) Variable decoder/decoder/encoder_decoder_attention/query/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None) Variable decoder/decoder/encoder_decoder_attention/value/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None) Variable decoder/decoder/mlp/wi_0/kernel size 384 shape (embed=8, layers=3, mlp=16) partition spec (None, None, 'model') Variable decoder/decoder/mlp/wi_1/kernel size 384 shape (embed=8, layers=3, mlp=16) partition spec (None, None, 'model') Variable decoder/decoder/mlp/wo/kernel size 384 shape (mlp=16, layers=3, embed=8) partition spec ('model', None, None) Variable decoder/decoder/pre_cross_attention_layer_norm/scale size 24 shape (embed=8, layers=3) partition spec (None, None) Variable decoder/decoder/pre_mlp_layer_norm/scale size 24 shape (embed=8, layers=3) partition spec (None, None) Variable decoder/decoder/pre_self_attention_layer_norm/scale size 24 shape (embed=8, layers=3) partition spec (None, None) Variable decoder/decoder/relpos_bias/rel_embedding size 768 shape (heads=4, layers=3, relpos_buckets=64) partition spec ('model', None, None) Variable decoder/decoder/self_attention/key/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None) Variable decoder/decoder/self_attention/out/kernel size 576 shape (heads=4, layers=3, kv=6, embed=8) partition spec ('model', None, None, None) Variable decoder/decoder/self_attention/query/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None) Variable decoder/decoder/self_attention/value/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None) Variable decoder/decoder_norm/scale size 8 shape (embed=8) partition spec (None,) Variable decoder/logits_dense/kernel size 2048 shape (embed=8, vocab=256) partition spec (None, 'model') Variable encoder/encoder/attention/key/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None) Variable encoder/encoder/attention/out/kernel size 576 shape (heads=4, layers=3, kv=6, embed=8) partition spec ('model', None, None, None) Variable encoder/encoder/attention/query/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None) Variable encoder/encoder/attention/value/kernel size 576 shape (embed=8, layers=3, heads=4, kv=6) partition spec (None, None, 'model', None) Variable encoder/encoder/mlp/wi_0/kernel size 384 shape (embed=8, layers=3, mlp=16) partition spec (None, None, 'model') Variable encoder/encoder/mlp/wi_1/kernel size 384 shape (embed=8, layers=3, mlp=16) partition spec (None, None, 'model') Variable encoder/encoder/mlp/wo/kernel size 384 shape (mlp=16, layers=3, embed=8) partition spec ('model', None, None) Variable encoder/encoder/pre_attention_layer_norm/scale size 24 shape (embed=8, layers=3) partition spec (None, None) Variable encoder/encoder/pre_mlp_layer_norm/scale size 24 shape (embed=8, layers=3) partition spec (None, None) Variable encoder/encoder/relpos_bias/rel_embedding size 768 shape (heads=4, layers=3, relpos_buckets=64) partition spec ('model', None, None) Variable encoder/encoder_norm/scale size 8 shape (embed=8) partition spec (None,) Variable token_embedder/embedding size 2048 shape (vocab=256, embed=8) partition spec ('model', None) Total number of parameters: 14984 Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/v_col size 72 shape (3, 4, 6) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/v_row size 24 shape (8, 3) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/v_col size 72 shape (4, 3, 6) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/v_row size 24 shape (3, 8) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/v_col size 72 shape (3, 4, 6) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/v_row size 24 shape (8, 3) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/v_col size 72 shape (3, 4, 6) partition spec None Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/v_row size 24 shape (8, 3) partition spec None Variable param_states/decoder/decoder/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/mlp/wi_0/kernel/v_col size 48 shape (3, 16) partition spec None Variable param_states/decoder/decoder/mlp/wi_0/kernel/v_row size 24 shape (8, 3) partition spec None Variable param_states/decoder/decoder/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/mlp/wi_1/kernel/v_col size 48 shape (3, 16) partition spec None Variable param_states/decoder/decoder/mlp/wi_1/kernel/v_row size 24 shape (8, 3) partition spec None Variable param_states/decoder/decoder/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/mlp/wo/kernel/v_col size 48 shape (16, 3) partition spec None Variable param_states/decoder/decoder/mlp/wo/kernel/v_row size 24 shape (3, 8) partition spec None Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/v size 24 shape (embed=8, layers=3) partition spec (None, None) Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/v size 24 shape (embed=8, layers=3) partition spec (None, None) Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/v size 24 shape (embed=8, layers=3) partition spec (None, None) Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/relpos_bias/rel_embedding/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/relpos_bias/rel_embedding/v size 768 shape (heads=4, layers=3, relpos_buckets=64) partition spec ('model', None, None) Variable param_states/decoder/decoder/relpos_bias/rel_embedding/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/relpos_bias/rel_embedding/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/self_attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/self_attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/self_attention/key/kernel/v_col size 72 shape (3, 4, 6) partition spec None Variable param_states/decoder/decoder/self_attention/key/kernel/v_row size 24 shape (8, 3) partition spec None Variable param_states/decoder/decoder/self_attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/self_attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/self_attention/out/kernel/v_col size 72 shape (4, 3, 6) partition spec None Variable param_states/decoder/decoder/self_attention/out/kernel/v_row size 24 shape (3, 8) partition spec None Variable param_states/decoder/decoder/self_attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/self_attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/self_attention/query/kernel/v_col size 72 shape (3, 4, 6) partition spec None Variable param_states/decoder/decoder/self_attention/query/kernel/v_row size 24 shape (8, 3) partition spec None Variable param_states/decoder/decoder/self_attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/self_attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/decoder/decoder/self_attention/value/kernel/v_col size 72 shape (3, 4, 6) partition spec None Variable param_states/decoder/decoder/self_attention/value/kernel/v_row size 24 shape (8, 3) partition spec None Variable param_states/decoder/decoder_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/decoder/decoder_norm/scale/v size 8 shape (embed=8) partition spec (None,) Variable param_states/decoder/decoder_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/decoder_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/decoder/logits_dense/kernel/m size 1 shape (1,) partition spec None Variable param_states/decoder/logits_dense/kernel/v size 2048 shape (embed=8, vocab=256) partition spec (None, 'model') Variable param_states/decoder/logits_dense/kernel/v_col size 1 shape (1,) partition spec None Variable param_states/decoder/logits_dense/kernel/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/attention/key/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/attention/key/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/attention/key/kernel/v_col size 72 shape (3, 4, 6) partition spec None Variable param_states/encoder/encoder/attention/key/kernel/v_row size 24 shape (8, 3) partition spec None Variable param_states/encoder/encoder/attention/out/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/attention/out/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/attention/out/kernel/v_col size 72 shape (4, 3, 6) partition spec None Variable param_states/encoder/encoder/attention/out/kernel/v_row size 24 shape (3, 8) partition spec None Variable param_states/encoder/encoder/attention/query/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/attention/query/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/attention/query/kernel/v_col size 72 shape (3, 4, 6) partition spec None Variable param_states/encoder/encoder/attention/query/kernel/v_row size 24 shape (8, 3) partition spec None Variable param_states/encoder/encoder/attention/value/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/attention/value/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/attention/value/kernel/v_col size 72 shape (3, 4, 6) partition spec None Variable param_states/encoder/encoder/attention/value/kernel/v_row size 24 shape (8, 3) partition spec None Variable param_states/encoder/encoder/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/mlp/wi_0/kernel/v_col size 48 shape (3, 16) partition spec None Variable param_states/encoder/encoder/mlp/wi_0/kernel/v_row size 24 shape (8, 3) partition spec None Variable param_states/encoder/encoder/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/mlp/wi_1/kernel/v_col size 48 shape (3, 16) partition spec None Variable param_states/encoder/encoder/mlp/wi_1/kernel/v_row size 24 shape (8, 3) partition spec None Variable param_states/encoder/encoder/mlp/wo/kernel/m size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/mlp/wo/kernel/v size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/mlp/wo/kernel/v_col size 48 shape (16, 3) partition spec None Variable param_states/encoder/encoder/mlp/wo/kernel/v_row size 24 shape (3, 8) partition spec None Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/v size 24 shape (embed=8, layers=3) partition spec (None, None) Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/v size 24 shape (embed=8, layers=3) partition spec (None, None) Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/relpos_bias/rel_embedding/m size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/relpos_bias/rel_embedding/v size 768 shape (heads=4, layers=3, relpos_buckets=64) partition spec ('model', None, None) Variable param_states/encoder/encoder/relpos_bias/rel_embedding/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/encoder/relpos_bias/rel_embedding/v_row size 1 shape (1,) partition spec None Variable param_states/encoder/encoder_norm/scale/m size 1 shape (1,) partition spec None Variable param_states/encoder/encoder_norm/scale/v size 8 shape (embed=8) partition spec (None,) Variable param_states/encoder/encoder_norm/scale/v_col size 1 shape (1,) partition spec None Variable param_states/encoder/encoder_norm/scale/v_row size 1 shape (1,) partition spec None Variable param_states/token_embedder/embedding/m size 1 shape (1,) partition spec None Variable param_states/token_embedder/embedding/v size 2048 shape (vocab=256, embed=8) partition spec ('model', None) Variable param_states/token_embedder/embedding/v_col size 1 shape (1,) partition spec None Variable param_states/token_embedder/embedding/v_row size 1 shape (1,) partition spec None Variable step size 1 shape () partition spec None |