|
{ |
|
"metadata": { |
|
"total_size": 11399028736 |
|
}, |
|
"weight_map": { |
|
"decoder/block/0/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/0/SelfAttention/relative_attention_bias/embedding": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/0/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/1/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/10/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/11/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/12/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/13/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/14/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/15/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/16/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/17/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/18/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/19/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/19/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/19/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/19/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/19/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/19/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/19/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/19/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/19/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/19/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/19/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/19/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/19/layer/2/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/19/layer/2/layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/2/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/2/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/2/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/2/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/2/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/2/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/2/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/2/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/2/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/2/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/2/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/2/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/2/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/2/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/20/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/20/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/20/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/20/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/20/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/20/layer/1/EncDecAttention/k/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/20/layer/1/EncDecAttention/o/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/20/layer/1/EncDecAttention/q/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/20/layer/1/EncDecAttention/v/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/20/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/20/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/20/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/20/layer/2/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/20/layer/2/layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/1/EncDecAttention/k/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/1/EncDecAttention/o/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/1/EncDecAttention/q/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/1/EncDecAttention/v/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/2/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/21/layer/2/layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/1/EncDecAttention/k/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/1/EncDecAttention/o/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/1/EncDecAttention/q/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/1/EncDecAttention/v/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/2/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/22/layer/2/layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/0/SelfAttention/k/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/0/SelfAttention/o/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/0/SelfAttention/q/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/0/SelfAttention/v/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/0/layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/1/EncDecAttention/k/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/1/EncDecAttention/o/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/1/EncDecAttention/q/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/1/EncDecAttention/v/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/1/layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/2/DenseReluDense/wo/kernel": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/23/layer/2/layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"decoder/block/3/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/3/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/3/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/3/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/3/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/3/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/3/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/3/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/3/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/3/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/3/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/3/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/3/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/3/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/4/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/5/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/6/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/7/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/8/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/1/EncDecAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/1/EncDecAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/1/EncDecAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/1/EncDecAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/2/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/2/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/2/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"decoder/block/9/layer/2/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"decoder/final_layer_norm/weight": "flax_model-00002-of-00002.msgpack", |
|
"encoder/block/0/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/0/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/0/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/0/layer/0/SelfAttention/relative_attention_bias/embedding": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/0/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/0/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/0/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/0/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/0/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/0/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/1/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/1/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/1/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/1/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/1/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/1/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/1/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/1/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/1/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/10/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/10/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/10/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/10/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/10/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/10/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/10/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/10/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/10/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/11/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/11/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/11/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/11/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/11/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/11/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/11/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/11/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/11/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/12/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/12/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/12/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/12/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/12/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/12/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/12/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/12/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/12/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/13/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/13/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/13/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/13/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/13/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/13/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/13/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/13/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/13/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/14/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/14/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/14/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/14/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/14/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/14/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/14/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/14/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/14/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/15/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/15/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/15/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/15/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/15/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/15/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/15/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/15/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/15/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/16/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/16/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/16/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/16/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/16/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/16/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/16/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/16/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/16/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/17/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/17/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/17/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/17/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/17/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/17/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/17/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/17/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/17/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/18/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/18/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/18/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/18/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/18/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/18/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/18/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/18/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/18/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/19/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/19/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/19/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/19/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/19/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/19/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/19/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/19/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/19/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/2/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/2/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/2/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/2/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/2/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/2/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/2/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/2/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/2/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/20/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/20/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/20/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/20/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/20/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/20/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/20/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/20/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/20/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/21/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/21/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/21/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/21/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/21/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/21/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/21/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/21/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/21/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/22/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/22/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/22/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/22/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/22/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/22/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/22/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/22/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/22/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/23/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/23/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/23/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/23/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/23/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/23/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/23/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/23/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/23/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/3/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/3/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/3/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/3/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/3/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/3/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/3/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/3/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/3/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/4/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/4/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/4/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/4/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/4/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/4/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/4/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/4/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/4/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/5/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/5/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/5/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/5/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/5/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/5/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/5/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/5/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/5/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/6/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/6/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/6/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/6/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/6/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/6/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/6/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/6/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/6/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/7/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/7/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/7/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/7/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/7/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/7/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/7/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/7/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/7/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/8/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/8/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/8/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/8/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/8/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/8/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/8/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/8/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/8/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/9/layer/0/SelfAttention/k/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/9/layer/0/SelfAttention/o/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/9/layer/0/SelfAttention/q/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/9/layer/0/SelfAttention/v/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/9/layer/0/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/9/layer/1/DenseReluDense/wi_0/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/9/layer/1/DenseReluDense/wi_1/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/9/layer/1/DenseReluDense/wo/kernel": "flax_model-00001-of-00002.msgpack", |
|
"encoder/block/9/layer/1/layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"encoder/final_layer_norm/weight": "flax_model-00001-of-00002.msgpack", |
|
"lm_head/kernel": "flax_model-00002-of-00002.msgpack", |
|
"shared/embedding": "flax_model-00001-of-00002.msgpack" |
|
} |
|
} |
|
|