nisten commited on
Commit
d57487f
1 Parent(s): 4236770

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +13 -53
config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "_name_or_path": "nisten/lobotollama3",
3
  "architectures": [
4
- "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
@@ -11,64 +11,24 @@
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 14336,
14
- "max_position_embeddings": 8192,
15
- "model_type": "llama",
16
  "num_attention_heads": 32,
17
- "num_experts": 2,
18
  "num_experts_per_tok": 2,
19
  "num_hidden_layers": 32,
20
  "num_key_value_heads": 8,
 
 
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-05,
23
- "rope_scaling": {
24
- "factor": 4.0,
25
- "original_max_position_embeddings": 8192,
26
- "type": "yarn",
27
- "finetuned": "true"
28
- },
29
- "rope_theta": 8000000.0,
30
- "router_layers": [
31
- "gate_proj",
32
- "up_proj",
33
- "down_proj"
34
- ],
35
- "router_layers_index": [
36
- 0,
37
- 1,
38
- 2,
39
- 3,
40
- 4,
41
- 5,
42
- 6,
43
- 7,
44
- 8,
45
- 9,
46
- 10,
47
- 11,
48
- 12,
49
- 13,
50
- 14,
51
- 15,
52
- 16,
53
- 17,
54
- 18,
55
- 19,
56
- 20,
57
- 21,
58
- 22,
59
- 23,
60
- 24,
61
- 25,
62
- 26,
63
- 27,
64
- 28,
65
- 29,
66
- 30,
67
- 31
68
- ],
69
  "tie_word_embeddings": false,
70
  "torch_dtype": "bfloat16",
71
- "transformers_version": "4.40.0.dev0",
72
  "use_cache": true,
73
  "vocab_size": 128256
74
- }
 
1
  {
2
+ "_name_or_path": "/content/llama-3-8b-instruct",
3
  "architectures": [
4
+ "MixtralForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
 
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 14336,
14
+ "max_position_embeddings": 65536,
15
+ "model_type": "mixtral",
16
  "num_attention_heads": 32,
 
17
  "num_experts_per_tok": 2,
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
20
+ "num_local_experts": 2,
21
+ "output_router_logits": false,
22
  "pretraining_tp": 1,
23
  "rms_norm_eps": 1e-05,
24
+ "rope_scaling": null,
25
+ "rope_theta": 500000.0,
26
+ "router_aux_loss_coef": 0.001,
27
+ "router_jitter_noise": 0.0,
28
+ "sliding_window": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "tie_word_embeddings": false,
30
  "torch_dtype": "bfloat16",
31
+ "transformers_version": "4.41.0.dev0",
32
  "use_cache": true,
33
  "vocab_size": 128256
34
+ }