mlfoundations
/

dclm-7b-it

Inference Endpoints

Model card Files Files and versions Community

Update config.json

#2

by jmercat - opened Aug 6

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

Files changed (1) hide show

config.json +25 -27

config.json CHANGED Viewed

@@ -4,33 +4,31 @@
   ],
   "model_type": "openlm",
   "params": null,
-  "params_args_dict": {
-    "apply_qk_norm": true,
-    "attn_activation": null,
-    "attn_name": "auto",
-    "attn_seq_scalar": null,
-    "attn_seq_scalar_alpha": null,
-    "dim": 4096,
-    "ffn_type": "swiglu",
-    "model": "open_lm_7b",
-    "model_norm": "gain_only_lp_layer_norm",
-    "moe_capacity_factor": 1.25,
-    "moe_expert_model_parallelism": false,
-    "moe_freq": 0,
-    "moe_loss_weight": 0.1,
-    "moe_num_experts": null,
-    "moe_top_k": 2,
-    "moe_weight_parallelism": false,
-    "n_heads": 32,
-    "n_layers": 32,
-    "norm_eps": 1e-05,
-    "positional_embedding_type": "rotary",
-    "post_embed_norm": false,
-    "qk_norm": true,
-    "seq_len": 2048,
-    "vocab_size": 50432,
-    "weight_tying": false
-  },
   "torch_dtype": "float32",
   "transformers_version": "4.41.2"
 }

   ],
   "model_type": "openlm",
   "params": null,
+  "apply_qk_norm": true,
+  "attn_activation": null,
+  "attn_name": "auto",
+  "attn_seq_scalar": null,
+  "attn_seq_scalar_alpha": null,
+  "dim": 4096,
+  "ffn_type": "swiglu",
+  "model": "open_lm_7b",
+  "model_norm": "gain_only_lp_layer_norm",
+  "moe_capacity_factor": 1.25,
+  "moe_expert_model_parallelism": false,
+  "moe_freq": 0,
+  "moe_loss_weight": 0.1,
+  "moe_num_experts": null,
+  "moe_top_k": 2,
+  "moe_weight_parallelism": false,
+  "n_heads": 32,
+  "n_layers": 32,
+  "norm_eps": 1e-05,
+  "positional_embedding_type": "rotary",
+  "post_embed_norm": false,
+  "qk_norm": true,
+  "seq_len": 2048,
+  "vocab_size": 50432,
+  "weight_tying": false
   "torch_dtype": "float32",
   "transformers_version": "4.41.2"
 }