File size: 26,167 Bytes
b144aaa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
Variable decoder/decoder/encoder_decoder_attention/key/kernel                             size 576          shape (embed=8, layers=3, heads=4, kv=6)       partition spec (None, None, 'model', None)
Variable decoder/decoder/encoder_decoder_attention/out/kernel                             size 576          shape (heads=4, layers=3, kv=6, embed=8)       partition spec ('model', None, None, None)
Variable decoder/decoder/encoder_decoder_attention/query/kernel                           size 576          shape (embed=8, layers=3, heads=4, kv=6)       partition spec (None, None, 'model', None)
Variable decoder/decoder/encoder_decoder_attention/value/kernel                           size 576          shape (embed=8, layers=3, heads=4, kv=6)       partition spec (None, None, 'model', None)
Variable decoder/decoder/mlp/wi_0/kernel                                                  size 384          shape (embed=8, layers=3, mlp=16)              partition spec (None, None, 'model')
Variable decoder/decoder/mlp/wi_1/kernel                                                  size 384          shape (embed=8, layers=3, mlp=16)              partition spec (None, None, 'model')
Variable decoder/decoder/mlp/wo/kernel                                                    size 384          shape (mlp=16, layers=3, embed=8)              partition spec ('model', None, None)
Variable decoder/decoder/pre_cross_attention_layer_norm/scale                             size 24           shape (embed=8, layers=3)                      partition spec (None, None)
Variable decoder/decoder/pre_mlp_layer_norm/scale                                         size 24           shape (embed=8, layers=3)                      partition spec (None, None)
Variable decoder/decoder/pre_self_attention_layer_norm/scale                              size 24           shape (embed=8, layers=3)                      partition spec (None, None)
Variable decoder/decoder/relpos_bias/rel_embedding                                        size 768          shape (heads=4, layers=3, relpos_buckets=64)   partition spec ('model', None, None)
Variable decoder/decoder/self_attention/key/kernel                                        size 576          shape (embed=8, layers=3, heads=4, kv=6)       partition spec (None, None, 'model', None)
Variable decoder/decoder/self_attention/out/kernel                                        size 576          shape (heads=4, layers=3, kv=6, embed=8)       partition spec ('model', None, None, None)
Variable decoder/decoder/self_attention/query/kernel                                      size 576          shape (embed=8, layers=3, heads=4, kv=6)       partition spec (None, None, 'model', None)
Variable decoder/decoder/self_attention/value/kernel                                      size 576          shape (embed=8, layers=3, heads=4, kv=6)       partition spec (None, None, 'model', None)
Variable decoder/decoder_norm/scale                                                       size 8            shape (embed=8)                                partition spec (None,)
Variable decoder/logits_dense/kernel                                                      size 2048         shape (embed=8, vocab=256)                     partition spec (None, 'model')
Variable encoder/encoder/attention/key/kernel                                             size 576          shape (embed=8, layers=3, heads=4, kv=6)       partition spec (None, None, 'model', None)
Variable encoder/encoder/attention/out/kernel                                             size 576          shape (heads=4, layers=3, kv=6, embed=8)       partition spec ('model', None, None, None)
Variable encoder/encoder/attention/query/kernel                                           size 576          shape (embed=8, layers=3, heads=4, kv=6)       partition spec (None, None, 'model', None)
Variable encoder/encoder/attention/value/kernel                                           size 576          shape (embed=8, layers=3, heads=4, kv=6)       partition spec (None, None, 'model', None)
Variable encoder/encoder/mlp/wi_0/kernel                                                  size 384          shape (embed=8, layers=3, mlp=16)              partition spec (None, None, 'model')
Variable encoder/encoder/mlp/wi_1/kernel                                                  size 384          shape (embed=8, layers=3, mlp=16)              partition spec (None, None, 'model')
Variable encoder/encoder/mlp/wo/kernel                                                    size 384          shape (mlp=16, layers=3, embed=8)              partition spec ('model', None, None)
Variable encoder/encoder/pre_attention_layer_norm/scale                                   size 24           shape (embed=8, layers=3)                      partition spec (None, None)
Variable encoder/encoder/pre_mlp_layer_norm/scale                                         size 24           shape (embed=8, layers=3)                      partition spec (None, None)
Variable encoder/encoder/relpos_bias/rel_embedding                                        size 768          shape (heads=4, layers=3, relpos_buckets=64)   partition spec ('model', None, None)
Variable encoder/encoder_norm/scale                                                       size 8            shape (embed=8)                                partition spec (None,)
Variable token_embedder/embedding                                                         size 2048         shape (vocab=256, embed=8)                     partition spec ('model', None)
Total number of parameters: 14984

Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/m              size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/v              size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/v_col          size 72           shape (3, 4, 6)                                partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/v_row          size 24           shape (8, 3)                                   partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/m              size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/v              size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/v_col          size 72           shape (4, 3, 6)                                partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/v_row          size 24           shape (3, 8)                                   partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/m            size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/v            size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/v_col        size 72           shape (3, 4, 6)                                partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/v_row        size 24           shape (8, 3)                                   partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/m            size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/v            size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/v_col        size 72           shape (3, 4, 6)                                partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/v_row        size 24           shape (8, 3)                                   partition spec None
Variable param_states/decoder/decoder/mlp/wi_0/kernel/m                                   size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/mlp/wi_0/kernel/v                                   size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/mlp/wi_0/kernel/v_col                               size 48           shape (3, 16)                                  partition spec None
Variable param_states/decoder/decoder/mlp/wi_0/kernel/v_row                               size 24           shape (8, 3)                                   partition spec None
Variable param_states/decoder/decoder/mlp/wi_1/kernel/m                                   size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/mlp/wi_1/kernel/v                                   size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/mlp/wi_1/kernel/v_col                               size 48           shape (3, 16)                                  partition spec None
Variable param_states/decoder/decoder/mlp/wi_1/kernel/v_row                               size 24           shape (8, 3)                                   partition spec None
Variable param_states/decoder/decoder/mlp/wo/kernel/m                                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/mlp/wo/kernel/v                                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/mlp/wo/kernel/v_col                                 size 48           shape (16, 3)                                  partition spec None
Variable param_states/decoder/decoder/mlp/wo/kernel/v_row                                 size 24           shape (3, 8)                                   partition spec None
Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/m              size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/v              size 24           shape (embed=8, layers=3)                      partition spec (None, None)
Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/v_col          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/v_row          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/v                          size 24           shape (embed=8, layers=3)                      partition spec (None, None)
Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/v_col                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/v_row                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/m               size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/v               size 24           shape (embed=8, layers=3)                      partition spec (None, None)
Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/v_col           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/v_row           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/relpos_bias/rel_embedding/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/relpos_bias/rel_embedding/v                         size 768          shape (heads=4, layers=3, relpos_buckets=64)   partition spec ('model', None, None)
Variable param_states/decoder/decoder/relpos_bias/rel_embedding/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/relpos_bias/rel_embedding/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/self_attention/key/kernel/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/self_attention/key/kernel/v                         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/self_attention/key/kernel/v_col                     size 72           shape (3, 4, 6)                                partition spec None
Variable param_states/decoder/decoder/self_attention/key/kernel/v_row                     size 24           shape (8, 3)                                   partition spec None
Variable param_states/decoder/decoder/self_attention/out/kernel/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/self_attention/out/kernel/v                         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/self_attention/out/kernel/v_col                     size 72           shape (4, 3, 6)                                partition spec None
Variable param_states/decoder/decoder/self_attention/out/kernel/v_row                     size 24           shape (3, 8)                                   partition spec None
Variable param_states/decoder/decoder/self_attention/query/kernel/m                       size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/self_attention/query/kernel/v                       size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/self_attention/query/kernel/v_col                   size 72           shape (3, 4, 6)                                partition spec None
Variable param_states/decoder/decoder/self_attention/query/kernel/v_row                   size 24           shape (8, 3)                                   partition spec None
Variable param_states/decoder/decoder/self_attention/value/kernel/m                       size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/self_attention/value/kernel/v                       size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder/self_attention/value/kernel/v_col                   size 72           shape (3, 4, 6)                                partition spec None
Variable param_states/decoder/decoder/self_attention/value/kernel/v_row                   size 24           shape (8, 3)                                   partition spec None
Variable param_states/decoder/decoder_norm/scale/m                                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder_norm/scale/v                                        size 8            shape (embed=8)                                partition spec (None,)
Variable param_states/decoder/decoder_norm/scale/v_col                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder_norm/scale/v_row                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/logits_dense/kernel/m                                       size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/logits_dense/kernel/v                                       size 2048         shape (embed=8, vocab=256)                     partition spec (None, 'model')
Variable param_states/decoder/logits_dense/kernel/v_col                                   size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/logits_dense/kernel/v_row                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/attention/key/kernel/m                              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/attention/key/kernel/v                              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/attention/key/kernel/v_col                          size 72           shape (3, 4, 6)                                partition spec None
Variable param_states/encoder/encoder/attention/key/kernel/v_row                          size 24           shape (8, 3)                                   partition spec None
Variable param_states/encoder/encoder/attention/out/kernel/m                              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/attention/out/kernel/v                              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/attention/out/kernel/v_col                          size 72           shape (4, 3, 6)                                partition spec None
Variable param_states/encoder/encoder/attention/out/kernel/v_row                          size 24           shape (3, 8)                                   partition spec None
Variable param_states/encoder/encoder/attention/query/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/attention/query/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/attention/query/kernel/v_col                        size 72           shape (3, 4, 6)                                partition spec None
Variable param_states/encoder/encoder/attention/query/kernel/v_row                        size 24           shape (8, 3)                                   partition spec None
Variable param_states/encoder/encoder/attention/value/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/attention/value/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/attention/value/kernel/v_col                        size 72           shape (3, 4, 6)                                partition spec None
Variable param_states/encoder/encoder/attention/value/kernel/v_row                        size 24           shape (8, 3)                                   partition spec None
Variable param_states/encoder/encoder/mlp/wi_0/kernel/m                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/mlp/wi_0/kernel/v                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/mlp/wi_0/kernel/v_col                               size 48           shape (3, 16)                                  partition spec None
Variable param_states/encoder/encoder/mlp/wi_0/kernel/v_row                               size 24           shape (8, 3)                                   partition spec None
Variable param_states/encoder/encoder/mlp/wi_1/kernel/m                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/mlp/wi_1/kernel/v                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/mlp/wi_1/kernel/v_col                               size 48           shape (3, 16)                                  partition spec None
Variable param_states/encoder/encoder/mlp/wi_1/kernel/v_row                               size 24           shape (8, 3)                                   partition spec None
Variable param_states/encoder/encoder/mlp/wo/kernel/m                                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/mlp/wo/kernel/v                                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/mlp/wo/kernel/v_col                                 size 48           shape (16, 3)                                  partition spec None
Variable param_states/encoder/encoder/mlp/wo/kernel/v_row                                 size 24           shape (3, 8)                                   partition spec None
Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/m                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/v                    size 24           shape (embed=8, layers=3)                      partition spec (None, None)
Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/v_col                size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/v_row                size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/v                          size 24           shape (embed=8, layers=3)                      partition spec (None, None)
Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/v_col                      size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/v_row                      size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/relpos_bias/rel_embedding/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/relpos_bias/rel_embedding/v                         size 768          shape (heads=4, layers=3, relpos_buckets=64)   partition spec ('model', None, None)
Variable param_states/encoder/encoder/relpos_bias/rel_embedding/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder/relpos_bias/rel_embedding/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder_norm/scale/m                                        size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder_norm/scale/v                                        size 8            shape (embed=8)                                partition spec (None,)
Variable param_states/encoder/encoder_norm/scale/v_col                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder_norm/scale/v_row                                    size 1            shape (1,)                                     partition spec None
Variable param_states/token_embedder/embedding/m                                          size 1            shape (1,)                                     partition spec None
Variable param_states/token_embedder/embedding/v                                          size 2048         shape (vocab=256, embed=8)                     partition spec ('model', None)
Variable param_states/token_embedder/embedding/v_col                                      size 1            shape (1,)                                     partition spec None
Variable param_states/token_embedder/embedding/v_row                                      size 1            shape (1,)                                     partition spec None
Variable step                                                                             size 1            shape ()                                       partition spec None