iamkoder001 commited on
Commit
0be5b58
·
verified ·
1 Parent(s): 69386ea

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +3 -2
config.json CHANGED
@@ -20,6 +20,7 @@
20
  "kv_lora_rank": 512,
21
  "max_position_embeddings": 163840,
22
  "model_type": "deepseek_v2",
 
23
  "moe_intermediate_size": 1408,
24
  "moe_layer_freq": 1,
25
  "n_group": 1,
@@ -27,7 +28,7 @@
27
  "n_shared_experts": 2,
28
  "norm_topk_prob": false,
29
  "num_attention_heads": 16,
30
- "num_experts_per_tok": 6,
31
  "num_hidden_layers": 27,
32
  "num_key_value_heads": 16,
33
  "pretraining_tp": 1,
@@ -45,7 +46,7 @@
45
  "type": "yarn"
46
  },
47
  "rope_theta": 10000,
48
- "routed_scaling_factor": 1.0,
49
  "scoring_func": "softmax",
50
  "seq_aux": true,
51
  "tie_word_embeddings": false,
 
20
  "kv_lora_rank": 512,
21
  "max_position_embeddings": 163840,
22
  "model_type": "deepseek_v2",
23
+ "_name_or_path": "SarvaCode-16B-Indigenous",
24
  "moe_intermediate_size": 1408,
25
  "moe_layer_freq": 1,
26
  "n_group": 1,
 
28
  "n_shared_experts": 2,
29
  "norm_topk_prob": false,
30
  "num_attention_heads": 16,
31
+ "num_experts_per_tok": 8,
32
  "num_hidden_layers": 27,
33
  "num_key_value_heads": 16,
34
  "pretraining_tp": 1,
 
46
  "type": "yarn"
47
  },
48
  "rope_theta": 10000,
49
+ "routed_scaling_factor": 1.5,
50
  "scoring_func": "softmax",
51
  "seq_aux": true,
52
  "tie_word_embeddings": false,