- model.decoder.final_layernorm.bias
- model.decoder.final_layernorm.weight
- model.decoder.layers.mlp.linear_fc1._extra_state
- model.decoder.layers.mlp.linear_fc1.layer_norm_bias
- model.decoder.layers.mlp.linear_fc1.layer_norm_weight
- model.decoder.layers.mlp.linear_fc1.weight
- model.decoder.layers.mlp.linear_fc2._extra_state
- model.decoder.layers.mlp.linear_fc2.weight
- model.decoder.layers.self_attention.linear_proj._extra_state
- model.decoder.layers.self_attention.linear_proj.weight
- model.decoder.layers.self_attention.linear_qkv._extra_state
- model.decoder.layers.self_attention.linear_qkv.layer_norm_bias
- model.decoder.layers.self_attention.linear_qkv.layer_norm_weight
- model.decoder.layers.self_attention.linear_qkv.weight
- model.embedding.word_embeddings.weight
- model.output_layer.weight
-
860 Bytes
LFS
-
113 Bytes