simarora commited on
Commit
5df808e
1 Parent(s): d7359c4

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +13 -2
config.json CHANGED
@@ -1,28 +1,39 @@
1
- {
2
  "n_embd": 1792,
3
  "n_inner": 3584,
4
  "n_head": 16,
5
  "n_layer": 36,
6
 
 
 
 
 
 
 
 
7
  "activation_function": "swiglu",
8
  "resid_pdrop": 0.0,
 
9
  "residual_in_fp32": True,
10
  "pad_vocab_size_multiple": 8,
11
  "use_flash_attn": True,
12
  "special_initializer": True,
 
13
  "max_position_embeddings": 0,
14
 
15
  "alt_mixer_layers": [1, 6, 11, 16, 21, 27, 33],
16
- "alt_2_mixer_layers": [2, 7, 12, 17, 22, 28, 34],
17
  "mixer": {
18
  "_target_": "based.models.mixers.base_conv.BaseConvWithSiLU4",
19
  "expand_proj": 4,
20
  "l_max": 2048,
21
  "kernel_sizes": [3],
 
22
  },
23
  "alt_mixer": {
24
  "_target_": "based.models.mixers.linear_attn.LinearAttention",
25
  "feature_dim": 16,
 
26
  "l_max": 2048,
27
  "num_heads": 16,
28
  "num_key_value_heads": 16,
 
1
+ config_check = {
2
  "n_embd": 1792,
3
  "n_inner": 3584,
4
  "n_head": 16,
5
  "n_layer": 36,
6
 
7
+ "mlp_fc1_bias": False,
8
+ "mlp_fc2_bias": False,
9
+ "out_proj_bias": False,
10
+ "qkv_proj_bias": False,
11
+ "reorder_and_upcast_attn": False,
12
+ "scale_attn_by_inverse_layer_idx": False,
13
+
14
  "activation_function": "swiglu",
15
  "resid_pdrop": 0.0,
16
+ "rms_norm": True,
17
  "residual_in_fp32": True,
18
  "pad_vocab_size_multiple": 8,
19
  "use_flash_attn": True,
20
  "special_initializer": True,
21
+ "rotary_emb_fraction": 1,
22
  "max_position_embeddings": 0,
23
 
24
  "alt_mixer_layers": [1, 6, 11, 16, 21, 27, 33],
25
+ "alt_mixer_2_layers": [2, 7, 12, 17, 22, 28, 34],
26
  "mixer": {
27
  "_target_": "based.models.mixers.base_conv.BaseConvWithSiLU4",
28
  "expand_proj": 4,
29
  "l_max": 2048,
30
  "kernel_sizes": [3],
31
+ "use_bias": True,
32
  },
33
  "alt_mixer": {
34
  "_target_": "based.models.mixers.linear_attn.LinearAttention",
35
  "feature_dim": 16,
36
+ "feature_name": "taylor_exp",
37
  "l_max": 2048,
38
  "num_heads": 16,
39
  "num_key_value_heads": 16,