stellaathena commited on
Commit
2208576
1 Parent(s): 658edeb

Update to GPT2 style

Browse files
Files changed (1) hide show
  1. config.json +4 -16
config.json CHANGED
@@ -4,30 +4,18 @@
4
  "GPTJForCausalLM"
5
  ],
6
  "attention_dropout": 0,
7
- "attention_layers": ["global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global", "global"],
8
- "attention_types": [
9
- [
10
- [
11
- "global"
12
- ],
13
- 28
14
- ]
15
- ],
16
  "bos_token_id": 50256,
17
  "embed_dropout": 0,
18
  "eos_token_id": 50256,
19
  "gradient_checkpointing": false,
20
- "hidden_size": 4096,
21
  "initializer_range": 0.02,
22
  "intermediate_size": null,
23
- "jax": true,
24
  "layer_norm_epsilon": 1e-05,
25
- "max_position_embeddings": 2048,
26
  "model_type": "gptj",
27
- "num_heads": 16,
28
- "num_layers": 28,
29
- "resid_dropout": 0,
30
- "rotary": true,
31
  "rotary_dim": 64,
32
  "summary_activation": null,
33
  "summary_first_dropout": 0.1,
 
4
  "GPTJForCausalLM"
5
  ],
6
  "attention_dropout": 0,
 
 
 
 
 
 
 
 
 
7
  "bos_token_id": 50256,
8
  "embed_dropout": 0,
9
  "eos_token_id": 50256,
10
  "gradient_checkpointing": false,
11
+ "n_embd": 4096,
12
  "initializer_range": 0.02,
13
  "intermediate_size": null,
 
14
  "layer_norm_epsilon": 1e-05,
15
+ "n_positions": 2048,
16
  "model_type": "gptj",
17
+ "n_head": 16,
18
+ "n_layer": 28,
 
 
19
  "rotary_dim": 64,
20
  "summary_activation": null,
21
  "summary_first_dropout": 0.1,