|
{ |
|
"n_layers": 8, |
|
"d_model": 1024, |
|
"n_ctx": 1024, |
|
"d_head": 64, |
|
"model_name": "custom", |
|
"n_heads": 16, |
|
"d_mlp": 4096, |
|
"act_fn": "solu_ln", |
|
"d_vocab": 50278, |
|
"eps": 1e-05, |
|
"use_attn_result": false, |
|
"use_attn_scale": true, |
|
"use_local_attn": false, |
|
"model_family": null, |
|
"checkpoint": null, |
|
"tokenizer_name": "EleutherAI/gpt-neox-20b", |
|
"window_size": null, |
|
"attn_types": null, |
|
"init_mode": "gpt2", |
|
"normalization_type": "LNPre", |
|
"device": "cuda", |
|
"attention_dir": "causal", |
|
"attn_only": false, |
|
"seed": 42, |
|
"initializer_range": 0.025, |
|
"init_weights": true, |
|
"scale_attn_by_inverse_layer_idx": false, |
|
"positional_embedding_type": "standard", |
|
"final_rms": false, |
|
"d_vocab_out": 50278, |
|
"parallel_attn_mlp": false |
|
} |