heegyu commited on
Commit
a147d92
1 Parent(s): 318095f

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +2 -86
config.json CHANGED
@@ -1,108 +1,24 @@
1
  {
2
- "_name_or_path": "heegyu/mistralai__Mistral-7B-v0.1-mistral-7b-v0.1-lima",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
6
- "attention_bias": false,
7
- "attention_dropout": 0.0,
8
- "attention_partition_spec": [
9
- [
10
- "dp",
11
- "fsdp"
12
- ],
13
- "sp",
14
- "tp",
15
- null
16
- ],
17
- "attn_mechanism": "normal",
18
- "axis_dims": [
19
- 1,
20
- -1,
21
- 1,
22
- 1
23
- ],
24
- "axis_names": [
25
- "dp",
26
- "fsdp",
27
- "tp",
28
- "sp"
29
- ],
30
- "backend": null,
31
- "bias_partition_spec": [
32
- [
33
- "dp",
34
- "fsdp"
35
- ],
36
- null,
37
- null,
38
- null
39
- ],
40
- "bits": null,
41
- "block_b": 1,
42
- "block_k": 128,
43
- "block_k_dkv": 128,
44
- "block_k_dq": 128,
45
- "block_k_major": 128,
46
- "block_k_major_dkv": 128,
47
- "block_k_major_dq": 128,
48
- "block_q": 128,
49
- "block_q_dkv": 128,
50
- "block_q_dq": 128,
51
- "block_q_major_dkv": 128,
52
  "bos_token_id": 1,
53
- "easy_method": "train",
54
  "eos_token_id": 2,
55
  "hidden_act": "silu",
56
  "hidden_size": 4096,
57
  "initializer_range": 0.02,
58
  "intermediate_size": 14336,
59
- "key_partition_spec": [
60
- [
61
- "dp",
62
- "fsdp"
63
- ],
64
- "sp",
65
- "tp",
66
- null
67
- ],
68
  "max_position_embeddings": 32768,
69
  "model_type": "mistral",
70
  "num_attention_heads": 32,
71
  "num_hidden_layers": 32,
72
  "num_key_value_heads": 8,
73
- "number_rep_kv": 1,
74
- "query_partition_spec": [
75
- [
76
- "dp",
77
- "fsdp"
78
- ],
79
- "sp",
80
- "tp",
81
- null
82
- ],
83
  "rms_norm_eps": 1e-05,
84
- "rope_scaling": null,
85
  "rope_theta": 10000.0,
86
- "scan_attention_layers": false,
87
- "scan_mlp_chunk_size": 1024,
88
- "scan_ring_attention": true,
89
  "sliding_window": 4096,
90
  "tie_word_embeddings": false,
91
  "torch_dtype": "bfloat16",
92
- "transformers_version": "4.38.1",
93
  "use_cache": true,
94
- "use_pjit_attention_force": false,
95
- "use_scan_mlp": false,
96
- "use_shard_map": false,
97
- "use_sharded_kv_caching": true,
98
- "value_partition_spec": [
99
- [
100
- "dp",
101
- "fsdp"
102
- ],
103
- "sp",
104
- "tp",
105
- null
106
- ],
107
  "vocab_size": 32000
108
- }
 
1
  {
 
2
  "architectures": [
3
  "MistralForCausalLM"
4
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "bos_token_id": 1,
 
6
  "eos_token_id": 2,
7
  "hidden_act": "silu",
8
  "hidden_size": 4096,
9
  "initializer_range": 0.02,
10
  "intermediate_size": 14336,
 
 
 
 
 
 
 
 
 
11
  "max_position_embeddings": 32768,
12
  "model_type": "mistral",
13
  "num_attention_heads": 32,
14
  "num_hidden_layers": 32,
15
  "num_key_value_heads": 8,
 
 
 
 
 
 
 
 
 
 
16
  "rms_norm_eps": 1e-05,
 
17
  "rope_theta": 10000.0,
 
 
 
18
  "sliding_window": 4096,
19
  "tie_word_embeddings": false,
20
  "torch_dtype": "bfloat16",
21
+ "transformers_version": "4.34.0.dev0",
22
  "use_cache": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "vocab_size": 32000
24
+ }