Undi95 TheBloke commited on
Commit
ce4a4e4
1 Parent(s): c98df05

Model config.json has Mistral params instead of Mixtral, breaking ExLlama quants and maybe affecting others too (#3)

Browse files

- Model config.json has Mistral params instead of Mixtral, breaking ExLlama quants and maybe affecting others too (ab09f2486d8ef6a128d4f21585713f169df0cbab)


Co-authored-by: Tom Jobbins <TheBloke@users.noreply.huggingface.co>

Files changed (1) hide show
  1. config.json +4 -4
config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_name_or_path": "jondurbin/bagel-7b-v0.1",
3
  "architectures": [
4
- "MistralForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 1,
@@ -19,12 +19,12 @@
19
  "num_local_experts": 8,
20
  "output_router_logits": false,
21
  "rms_norm_eps": 1e-05,
22
- "rope_theta": 10000.0,
23
- "router_aux_loss_coef": 0.001,
24
  "sliding_window": 4096,
25
  "tie_word_embeddings": false,
26
  "torch_dtype": "bfloat16",
27
  "transformers_version": "4.37.0.dev0",
28
- "use_cache": false,
29
  "vocab_size": 32000
30
  }
 
1
  {
2
  "_name_or_path": "jondurbin/bagel-7b-v0.1",
3
  "architectures": [
4
+ "MixtralForCausalLM"
5
  ],
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 1,
 
19
  "num_local_experts": 8,
20
  "output_router_logits": false,
21
  "rms_norm_eps": 1e-05,
22
+ "rope_theta": 1000000.0,
23
+ "router_aux_loss_coef": 0.02,
24
  "sliding_window": 4096,
25
  "tie_word_embeddings": false,
26
  "torch_dtype": "bfloat16",
27
  "transformers_version": "4.37.0.dev0",
28
+ "use_cache": true,
29
  "vocab_size": 32000
30
  }