redwood-attn-only-2l / config.json
NeelNanda's picture
Update config.json
cff40bd
{"n_layers": 2, "d_model": 256, "n_ctx": 2048, "d_head": 32, "model_name": "custom", "n_heads": 8, "d_mlp": null, "act_fn": null, "d_vocab": 50259, "eps": 1e-05, "use_attn_result": true, "use_attn_scale": true, "use_split_qkv_input": false, "use_local_attn": false, "original_architecture": null, "from_checkpoint": false, "checkpoint_index": null, "checkpoint_label_type": null, "checkpoint_value": null, "tokenizer_name": "gpt2", "window_size": null, "attn_types": null, "init_mode": "gpt2", "normalization_type": "LN", "device": "cuda", "attention_dir": "causal", "attn_only": true, "seed": null, "initializer_range": 0.05, "init_weights": true, "scale_attn_by_inverse_layer_idx": false, "positional_embedding_type": "shortformer", "final_rms": false, "d_vocab_out": 50259, "parallel_attn_mlp": false, "rotary_dim": null, "n_params": 524288, "use_hook_tokens": false}