Defetya commited on
Commit
cfe55d3
1 Parent(s): eeb68f5

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +1 -108
config.json CHANGED
@@ -3,96 +3,6 @@
3
  "Qwen2ForCausalLM"
4
  ],
5
  "attention_dropout": 0.0,
6
- "attention_partition_spec": [
7
- [
8
- "dp",
9
- "fsdp"
10
- ],
11
- "sp",
12
- "tp",
13
- null
14
- ],
15
- "attn_mechanism": "vanilla",
16
- "axis_dims": [
17
- 1,
18
- -1,
19
- 1,
20
- 1
21
- ],
22
- "axis_names": [
23
- "dp",
24
- "fsdp",
25
- "tp",
26
- "sp"
27
- ],
28
- "backend": null,
29
- "bias_partition_spec": [
30
- [
31
- "dp",
32
- "fsdp"
33
- ],
34
- null,
35
- null,
36
- null
37
- ],
38
- "bits": null,
39
- "block_b": 1,
40
- "block_k": 128,
41
- "block_k_dkv": 128,
42
- "block_k_dq": 128,
43
- "block_k_major": 128,
44
- "block_k_major_dkv": 128,
45
- "block_k_major_dq": 128,
46
- "block_q": 128,
47
- "block_q_dkv": 128,
48
- "block_q_dq": 128,
49
- "block_q_major_dkv": 128,
50
- "bos_token_id": 151643,
51
- "easy_method": "train",
52
- "embd_pdrop": 0.0,
53
- "eos_token_id": 151645,
54
- "fcm_max_ratio": 0.0,
55
- "fcm_min_ratio": 0.0,
56
- "generation_attention_partition_spec": [
57
- [
58
- "dp",
59
- "fsdp"
60
- ],
61
- null,
62
- "tp",
63
- null
64
- ],
65
- "generation_bias_partition_spec": [
66
- [
67
- "dp",
68
- "fsdp"
69
- ],
70
- null,
71
- null,
72
- null
73
- ],
74
- "generation_query_partition_spec": [
75
- [
76
- "dp",
77
- "fsdp"
78
- ],
79
- "sp",
80
- null,
81
- null
82
- ],
83
- "hidden_act": "silu",
84
- "hidden_size": 5120,
85
- "initializer_range": 0.02,
86
- "intermediate_size": 13696,
87
- "key_partition_spec": [
88
- [
89
- "dp",
90
- "fsdp"
91
- ],
92
- "sp",
93
- "tp",
94
- null
95
- ],
96
  "max_position_embeddings": 32768,
97
  "max_window_layers": 35,
98
  "model_type": "qwen2",
@@ -100,15 +10,7 @@
100
  "num_hidden_layers": 40,
101
  "num_key_value_heads": 40,
102
  "number_rep_kv": 1,
103
- "query_partition_spec": [
104
- [
105
- "dp",
106
- "fsdp"
107
- ],
108
- "sp",
109
- "tp",
110
- null
111
- ],
112
  "resid_pdrop": 0.0,
113
  "rms_norm_eps": 1e-06,
114
  "rope_scaling": null,
@@ -127,14 +29,5 @@
127
  "use_sharded_kv_caching": true,
128
  "use_sharding_constraint": false,
129
  "use_sliding_window": false,
130
- "value_partition_spec": [
131
- [
132
- "dp",
133
- "fsdp"
134
- ],
135
- "sp",
136
- "tp",
137
- null
138
- ],
139
  "vocab_size": 152064
140
  }
 
3
  "Qwen2ForCausalLM"
4
  ],
5
  "attention_dropout": 0.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  "max_position_embeddings": 32768,
7
  "max_window_layers": 35,
8
  "model_type": "qwen2",
 
10
  "num_hidden_layers": 40,
11
  "num_key_value_heads": 40,
12
  "number_rep_kv": 1,
13
+
 
 
 
 
 
 
 
 
14
  "resid_pdrop": 0.0,
15
  "rms_norm_eps": 1e-06,
16
  "rope_scaling": null,
 
29
  "use_sharded_kv_caching": true,
30
  "use_sharding_constraint": false,
31
  "use_sliding_window": false,
 
 
 
 
 
 
 
 
 
32
  "vocab_size": 152064
33
  }