File size: 767 Bytes
85e172b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
{
    "rewrite_module_tmp": "transformer.h.{}.mlp.fc_in",
    "layer_module_tmp": "transformer.h.{}",
    "mlp_module_tmp": "transformer.h.{}.mlp",
    "proj_module_tmp": "transformer.h.{}.mlp.fc_out",
    "embedding_layer": "transformer.wte",
    "v_loss_layer": 27,

    "norm_learnables": {
        "norm_weight": "transformer.h.{}.ln_1.weight",
        "norm_bias": "transformer.h.{}.ln_1.bias"
    },
    "weights_to_modify": {
        "w1_weight": "transformer.h.{}.mlp.fc_in.weight",
        "w1_bias": "transformer.h.{}.mlp.fc_in.bias",
        "w2_weight": "transformer.h.{}.mlp.fc_out.weight",
        "w2_bias": "transformer.h.{}.mlp.fc_out.bias"
    },

    "activation": "gelu",
    "n_embd": 4096,
    "mlp_type": "type1",
    "model_name": "gpt-j-6b"
}