test
#6
by
tastelikefeet
- opened
- MODEL_LICENSE +10 -13
- model-00001-of-00007.safetensors +0 -3
- model-00002-of-00007.safetensors +0 -3
- model-00003-of-00007.safetensors +0 -3
- model-00004-of-00007.safetensors +0 -3
- model-00005-of-00007.safetensors +0 -3
- model-00006-of-00007.safetensors +0 -3
- model-00007-of-00007.safetensors +0 -3
- model.safetensors.index.json +0 -207
- modeling_chatglm.py +5 -12
- quantization.py +11 -10
- special_tokens_map.json +0 -1
- tokenization_chatglm.py +25 -70
- tokenizer_config.json +6 -46
MODEL_LICENSE
CHANGED
@@ -9,17 +9,14 @@ The ChatGLM3-6B License
|
|
9 |
2. 许可授予
|
10 |
|
11 |
根据本许可的条款和条件,许可方特此授予您非排他性、全球性、不可转让、不可再许可、可撤销、免版税的版权许可。
|
12 |
-
|
13 |
-
经过登记的用户可以免费使用本模型进行商业活动,但必须遵守本许可的所有条款和条件。
|
14 |
上述版权声明和本许可声明应包含在本软件的所有副本或重要部分中。
|
15 |
|
16 |
3.限制
|
17 |
|
18 |
您不得出于任何军事或非法目的使用、复制、修改、合并、发布、分发、复制或创建本软件的全部或部分衍生作品。
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
您在使用中应遵循使用地所适用的法律法规政策、道德规范等要求。
|
23 |
|
24 |
4.免责声明
|
25 |
|
@@ -43,15 +40,15 @@ The ChatGLM3-6B License
|
|
43 |
|
44 |
2. License Grant
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
50 |
|
51 |
-
|
52 |
-
You are not allowed to use, copy, modify, merge, publish, distribute, copy or create all or part of the derivative works of this software for any military or illegal purposes.
|
53 |
-
You are not allowed to use this software to engage in any behavior that endangers national security and unity, endangers social public interests and public order, infringes on the rights and interests of others such as trade secrets, intellectual property rights, reputation rights, portrait rights, and property rights.
|
54 |
-
You should comply with the applicable laws, regulations, policies, ethical standards, and other requirements in the place of use during use.
|
55 |
|
56 |
4. Disclaimer
|
57 |
|
|
|
9 |
2. 许可授予
|
10 |
|
11 |
根据本许可的条款和条件,许可方特此授予您非排他性、全球性、不可转让、不可再许可、可撤销、免版税的版权许可。
|
12 |
+
|
|
|
13 |
上述版权声明和本许可声明应包含在本软件的所有副本或重要部分中。
|
14 |
|
15 |
3.限制
|
16 |
|
17 |
您不得出于任何军事或非法目的使用、复制、修改、合并、发布、分发、复制或创建本软件的全部或部分衍生作品。
|
18 |
|
19 |
+
您不得利用本软件从事任何危害国家安全和国家统一、危害社会公共利益、侵犯人身权益的行为。
|
|
|
|
|
20 |
|
21 |
4.免责声明
|
22 |
|
|
|
40 |
|
41 |
2. License Grant
|
42 |
|
43 |
+
Subject to the terms and conditions of this License, the Licensor hereby grants to you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty-free copyright license to use the Software.
|
44 |
+
|
45 |
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
46 |
+
|
47 |
+
3. Restriction
|
48 |
+
|
49 |
+
You will not use, copy, modify, merge, publish, distribute, reproduce, or create derivative works of the Software, in whole or in part, for any military, or illegal purposes.
|
50 |
|
51 |
+
You will not use the Software for any act that may undermine China's national security and national unity, harm the public interest of society, or infringe upon the rights and interests of human beings.
|
|
|
|
|
|
|
52 |
|
53 |
4. Disclaimer
|
54 |
|
model-00001-of-00007.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:b1052386eac358a18add3d0f92521c85ab338979da8eeb08a6499555b857f80d
|
3 |
-
size 1827774160
|
|
|
|
|
|
|
|
model-00002-of-00007.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f88d7f69a3940711b87308eb20dd0544df22aff81134fa0d5160d3c859f5d321
|
3 |
-
size 1968291888
|
|
|
|
|
|
|
|
model-00003-of-00007.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:447d41b7c5e7b2558905c98733469aa9e132540c91e13c4cdd7bfc58b60cc650
|
3 |
-
size 1927406936
|
|
|
|
|
|
|
|
model-00004-of-00007.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:386305288e2bcc4a26fcffef433c94592d155bf36e365f054f63d3e6b55a1429
|
3 |
-
size 1815217640
|
|
|
|
|
|
|
|
model-00005-of-00007.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6c304e2821774d649997d1f5ae5fa12afd4d1dc5fea2bf6e8b829405b81fa7f2
|
3 |
-
size 1968291920
|
|
|
|
|
|
|
|
model-00006-of-00007.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:ef45bbd33d6d540d3d3523bb169b52b91b9e265176ab7efc6838233bc3b7e95d
|
3 |
-
size 1927406960
|
|
|
|
|
|
|
|
model-00007-of-00007.safetensors
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6eb545c6aa8a839c56a78b80331fcb4fa403341925f79fb6a761e2e847bf1e40
|
3 |
-
size 1052805400
|
|
|
|
|
|
|
|
model.safetensors.index.json
DELETED
@@ -1,207 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"metadata": {
|
3 |
-
"total_size": 12487168064
|
4 |
-
},
|
5 |
-
"weight_map": {
|
6 |
-
"transformer.embedding.word_embeddings.weight": "model-00001-of-00007.safetensors",
|
7 |
-
"transformer.encoder.final_layernorm.weight": "model-00007-of-00007.safetensors",
|
8 |
-
"transformer.encoder.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors",
|
9 |
-
"transformer.encoder.layers.0.mlp.dense_4h_to_h.weight": "model-00001-of-00007.safetensors",
|
10 |
-
"transformer.encoder.layers.0.mlp.dense_h_to_4h.weight": "model-00001-of-00007.safetensors",
|
11 |
-
"transformer.encoder.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
|
12 |
-
"transformer.encoder.layers.0.self_attention.dense.weight": "model-00001-of-00007.safetensors",
|
13 |
-
"transformer.encoder.layers.0.self_attention.query_key_value.bias": "model-00001-of-00007.safetensors",
|
14 |
-
"transformer.encoder.layers.0.self_attention.query_key_value.weight": "model-00001-of-00007.safetensors",
|
15 |
-
"transformer.encoder.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors",
|
16 |
-
"transformer.encoder.layers.1.mlp.dense_4h_to_h.weight": "model-00001-of-00007.safetensors",
|
17 |
-
"transformer.encoder.layers.1.mlp.dense_h_to_4h.weight": "model-00001-of-00007.safetensors",
|
18 |
-
"transformer.encoder.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
|
19 |
-
"transformer.encoder.layers.1.self_attention.dense.weight": "model-00001-of-00007.safetensors",
|
20 |
-
"transformer.encoder.layers.1.self_attention.query_key_value.bias": "model-00001-of-00007.safetensors",
|
21 |
-
"transformer.encoder.layers.1.self_attention.query_key_value.weight": "model-00001-of-00007.safetensors",
|
22 |
-
"transformer.encoder.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors",
|
23 |
-
"transformer.encoder.layers.10.mlp.dense_4h_to_h.weight": "model-00003-of-00007.safetensors",
|
24 |
-
"transformer.encoder.layers.10.mlp.dense_h_to_4h.weight": "model-00003-of-00007.safetensors",
|
25 |
-
"transformer.encoder.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
|
26 |
-
"transformer.encoder.layers.10.self_attention.dense.weight": "model-00003-of-00007.safetensors",
|
27 |
-
"transformer.encoder.layers.10.self_attention.query_key_value.bias": "model-00003-of-00007.safetensors",
|
28 |
-
"transformer.encoder.layers.10.self_attention.query_key_value.weight": "model-00003-of-00007.safetensors",
|
29 |
-
"transformer.encoder.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors",
|
30 |
-
"transformer.encoder.layers.11.mlp.dense_4h_to_h.weight": "model-00003-of-00007.safetensors",
|
31 |
-
"transformer.encoder.layers.11.mlp.dense_h_to_4h.weight": "model-00003-of-00007.safetensors",
|
32 |
-
"transformer.encoder.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
|
33 |
-
"transformer.encoder.layers.11.self_attention.dense.weight": "model-00003-of-00007.safetensors",
|
34 |
-
"transformer.encoder.layers.11.self_attention.query_key_value.bias": "model-00003-of-00007.safetensors",
|
35 |
-
"transformer.encoder.layers.11.self_attention.query_key_value.weight": "model-00003-of-00007.safetensors",
|
36 |
-
"transformer.encoder.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors",
|
37 |
-
"transformer.encoder.layers.12.mlp.dense_4h_to_h.weight": "model-00004-of-00007.safetensors",
|
38 |
-
"transformer.encoder.layers.12.mlp.dense_h_to_4h.weight": "model-00003-of-00007.safetensors",
|
39 |
-
"transformer.encoder.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
|
40 |
-
"transformer.encoder.layers.12.self_attention.dense.weight": "model-00003-of-00007.safetensors",
|
41 |
-
"transformer.encoder.layers.12.self_attention.query_key_value.bias": "model-00003-of-00007.safetensors",
|
42 |
-
"transformer.encoder.layers.12.self_attention.query_key_value.weight": "model-00003-of-00007.safetensors",
|
43 |
-
"transformer.encoder.layers.13.input_layernorm.weight": "model-00004-of-00007.safetensors",
|
44 |
-
"transformer.encoder.layers.13.mlp.dense_4h_to_h.weight": "model-00004-of-00007.safetensors",
|
45 |
-
"transformer.encoder.layers.13.mlp.dense_h_to_4h.weight": "model-00004-of-00007.safetensors",
|
46 |
-
"transformer.encoder.layers.13.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
|
47 |
-
"transformer.encoder.layers.13.self_attention.dense.weight": "model-00004-of-00007.safetensors",
|
48 |
-
"transformer.encoder.layers.13.self_attention.query_key_value.bias": "model-00004-of-00007.safetensors",
|
49 |
-
"transformer.encoder.layers.13.self_attention.query_key_value.weight": "model-00004-of-00007.safetensors",
|
50 |
-
"transformer.encoder.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors",
|
51 |
-
"transformer.encoder.layers.14.mlp.dense_4h_to_h.weight": "model-00004-of-00007.safetensors",
|
52 |
-
"transformer.encoder.layers.14.mlp.dense_h_to_4h.weight": "model-00004-of-00007.safetensors",
|
53 |
-
"transformer.encoder.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
|
54 |
-
"transformer.encoder.layers.14.self_attention.dense.weight": "model-00004-of-00007.safetensors",
|
55 |
-
"transformer.encoder.layers.14.self_attention.query_key_value.bias": "model-00004-of-00007.safetensors",
|
56 |
-
"transformer.encoder.layers.14.self_attention.query_key_value.weight": "model-00004-of-00007.safetensors",
|
57 |
-
"transformer.encoder.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors",
|
58 |
-
"transformer.encoder.layers.15.mlp.dense_4h_to_h.weight": "model-00004-of-00007.safetensors",
|
59 |
-
"transformer.encoder.layers.15.mlp.dense_h_to_4h.weight": "model-00004-of-00007.safetensors",
|
60 |
-
"transformer.encoder.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
|
61 |
-
"transformer.encoder.layers.15.self_attention.dense.weight": "model-00004-of-00007.safetensors",
|
62 |
-
"transformer.encoder.layers.15.self_attention.query_key_value.bias": "model-00004-of-00007.safetensors",
|
63 |
-
"transformer.encoder.layers.15.self_attention.query_key_value.weight": "model-00004-of-00007.safetensors",
|
64 |
-
"transformer.encoder.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors",
|
65 |
-
"transformer.encoder.layers.16.mlp.dense_4h_to_h.weight": "model-00004-of-00007.safetensors",
|
66 |
-
"transformer.encoder.layers.16.mlp.dense_h_to_4h.weight": "model-00004-of-00007.safetensors",
|
67 |
-
"transformer.encoder.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
|
68 |
-
"transformer.encoder.layers.16.self_attention.dense.weight": "model-00004-of-00007.safetensors",
|
69 |
-
"transformer.encoder.layers.16.self_attention.query_key_value.bias": "model-00004-of-00007.safetensors",
|
70 |
-
"transformer.encoder.layers.16.self_attention.query_key_value.weight": "model-00004-of-00007.safetensors",
|
71 |
-
"transformer.encoder.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors",
|
72 |
-
"transformer.encoder.layers.17.mlp.dense_4h_to_h.weight": "model-00005-of-00007.safetensors",
|
73 |
-
"transformer.encoder.layers.17.mlp.dense_h_to_4h.weight": "model-00005-of-00007.safetensors",
|
74 |
-
"transformer.encoder.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
|
75 |
-
"transformer.encoder.layers.17.self_attention.dense.weight": "model-00004-of-00007.safetensors",
|
76 |
-
"transformer.encoder.layers.17.self_attention.query_key_value.bias": "model-00004-of-00007.safetensors",
|
77 |
-
"transformer.encoder.layers.17.self_attention.query_key_value.weight": "model-00004-of-00007.safetensors",
|
78 |
-
"transformer.encoder.layers.18.input_layernorm.weight": "model-00005-of-00007.safetensors",
|
79 |
-
"transformer.encoder.layers.18.mlp.dense_4h_to_h.weight": "model-00005-of-00007.safetensors",
|
80 |
-
"transformer.encoder.layers.18.mlp.dense_h_to_4h.weight": "model-00005-of-00007.safetensors",
|
81 |
-
"transformer.encoder.layers.18.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
|
82 |
-
"transformer.encoder.layers.18.self_attention.dense.weight": "model-00005-of-00007.safetensors",
|
83 |
-
"transformer.encoder.layers.18.self_attention.query_key_value.bias": "model-00005-of-00007.safetensors",
|
84 |
-
"transformer.encoder.layers.18.self_attention.query_key_value.weight": "model-00005-of-00007.safetensors",
|
85 |
-
"transformer.encoder.layers.19.input_layernorm.weight": "model-00005-of-00007.safetensors",
|
86 |
-
"transformer.encoder.layers.19.mlp.dense_4h_to_h.weight": "model-00005-of-00007.safetensors",
|
87 |
-
"transformer.encoder.layers.19.mlp.dense_h_to_4h.weight": "model-00005-of-00007.safetensors",
|
88 |
-
"transformer.encoder.layers.19.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
|
89 |
-
"transformer.encoder.layers.19.self_attention.dense.weight": "model-00005-of-00007.safetensors",
|
90 |
-
"transformer.encoder.layers.19.self_attention.query_key_value.bias": "model-00005-of-00007.safetensors",
|
91 |
-
"transformer.encoder.layers.19.self_attention.query_key_value.weight": "model-00005-of-00007.safetensors",
|
92 |
-
"transformer.encoder.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors",
|
93 |
-
"transformer.encoder.layers.2.mlp.dense_4h_to_h.weight": "model-00001-of-00007.safetensors",
|
94 |
-
"transformer.encoder.layers.2.mlp.dense_h_to_4h.weight": "model-00001-of-00007.safetensors",
|
95 |
-
"transformer.encoder.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
|
96 |
-
"transformer.encoder.layers.2.self_attention.dense.weight": "model-00001-of-00007.safetensors",
|
97 |
-
"transformer.encoder.layers.2.self_attention.query_key_value.bias": "model-00001-of-00007.safetensors",
|
98 |
-
"transformer.encoder.layers.2.self_attention.query_key_value.weight": "model-00001-of-00007.safetensors",
|
99 |
-
"transformer.encoder.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors",
|
100 |
-
"transformer.encoder.layers.20.mlp.dense_4h_to_h.weight": "model-00005-of-00007.safetensors",
|
101 |
-
"transformer.encoder.layers.20.mlp.dense_h_to_4h.weight": "model-00005-of-00007.safetensors",
|
102 |
-
"transformer.encoder.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
|
103 |
-
"transformer.encoder.layers.20.self_attention.dense.weight": "model-00005-of-00007.safetensors",
|
104 |
-
"transformer.encoder.layers.20.self_attention.query_key_value.bias": "model-00005-of-00007.safetensors",
|
105 |
-
"transformer.encoder.layers.20.self_attention.query_key_value.weight": "model-00005-of-00007.safetensors",
|
106 |
-
"transformer.encoder.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors",
|
107 |
-
"transformer.encoder.layers.21.mlp.dense_4h_to_h.weight": "model-00005-of-00007.safetensors",
|
108 |
-
"transformer.encoder.layers.21.mlp.dense_h_to_4h.weight": "model-00005-of-00007.safetensors",
|
109 |
-
"transformer.encoder.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
|
110 |
-
"transformer.encoder.layers.21.self_attention.dense.weight": "model-00005-of-00007.safetensors",
|
111 |
-
"transformer.encoder.layers.21.self_attention.query_key_value.bias": "model-00005-of-00007.safetensors",
|
112 |
-
"transformer.encoder.layers.21.self_attention.query_key_value.weight": "model-00005-of-00007.safetensors",
|
113 |
-
"transformer.encoder.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors",
|
114 |
-
"transformer.encoder.layers.22.mlp.dense_4h_to_h.weight": "model-00006-of-00007.safetensors",
|
115 |
-
"transformer.encoder.layers.22.mlp.dense_h_to_4h.weight": "model-00006-of-00007.safetensors",
|
116 |
-
"transformer.encoder.layers.22.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
|
117 |
-
"transformer.encoder.layers.22.self_attention.dense.weight": "model-00006-of-00007.safetensors",
|
118 |
-
"transformer.encoder.layers.22.self_attention.query_key_value.bias": "model-00006-of-00007.safetensors",
|
119 |
-
"transformer.encoder.layers.22.self_attention.query_key_value.weight": "model-00006-of-00007.safetensors",
|
120 |
-
"transformer.encoder.layers.23.input_layernorm.weight": "model-00006-of-00007.safetensors",
|
121 |
-
"transformer.encoder.layers.23.mlp.dense_4h_to_h.weight": "model-00006-of-00007.safetensors",
|
122 |
-
"transformer.encoder.layers.23.mlp.dense_h_to_4h.weight": "model-00006-of-00007.safetensors",
|
123 |
-
"transformer.encoder.layers.23.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
|
124 |
-
"transformer.encoder.layers.23.self_attention.dense.weight": "model-00006-of-00007.safetensors",
|
125 |
-
"transformer.encoder.layers.23.self_attention.query_key_value.bias": "model-00006-of-00007.safetensors",
|
126 |
-
"transformer.encoder.layers.23.self_attention.query_key_value.weight": "model-00006-of-00007.safetensors",
|
127 |
-
"transformer.encoder.layers.24.input_layernorm.weight": "model-00006-of-00007.safetensors",
|
128 |
-
"transformer.encoder.layers.24.mlp.dense_4h_to_h.weight": "model-00006-of-00007.safetensors",
|
129 |
-
"transformer.encoder.layers.24.mlp.dense_h_to_4h.weight": "model-00006-of-00007.safetensors",
|
130 |
-
"transformer.encoder.layers.24.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
|
131 |
-
"transformer.encoder.layers.24.self_attention.dense.weight": "model-00006-of-00007.safetensors",
|
132 |
-
"transformer.encoder.layers.24.self_attention.query_key_value.bias": "model-00006-of-00007.safetensors",
|
133 |
-
"transformer.encoder.layers.24.self_attention.query_key_value.weight": "model-00006-of-00007.safetensors",
|
134 |
-
"transformer.encoder.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors",
|
135 |
-
"transformer.encoder.layers.25.mlp.dense_4h_to_h.weight": "model-00006-of-00007.safetensors",
|
136 |
-
"transformer.encoder.layers.25.mlp.dense_h_to_4h.weight": "model-00006-of-00007.safetensors",
|
137 |
-
"transformer.encoder.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
|
138 |
-
"transformer.encoder.layers.25.self_attention.dense.weight": "model-00006-of-00007.safetensors",
|
139 |
-
"transformer.encoder.layers.25.self_attention.query_key_value.bias": "model-00006-of-00007.safetensors",
|
140 |
-
"transformer.encoder.layers.25.self_attention.query_key_value.weight": "model-00006-of-00007.safetensors",
|
141 |
-
"transformer.encoder.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors",
|
142 |
-
"transformer.encoder.layers.26.mlp.dense_4h_to_h.weight": "model-00007-of-00007.safetensors",
|
143 |
-
"transformer.encoder.layers.26.mlp.dense_h_to_4h.weight": "model-00006-of-00007.safetensors",
|
144 |
-
"transformer.encoder.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
|
145 |
-
"transformer.encoder.layers.26.self_attention.dense.weight": "model-00006-of-00007.safetensors",
|
146 |
-
"transformer.encoder.layers.26.self_attention.query_key_value.bias": "model-00006-of-00007.safetensors",
|
147 |
-
"transformer.encoder.layers.26.self_attention.query_key_value.weight": "model-00006-of-00007.safetensors",
|
148 |
-
"transformer.encoder.layers.27.input_layernorm.weight": "model-00007-of-00007.safetensors",
|
149 |
-
"transformer.encoder.layers.27.mlp.dense_4h_to_h.weight": "model-00007-of-00007.safetensors",
|
150 |
-
"transformer.encoder.layers.27.mlp.dense_h_to_4h.weight": "model-00007-of-00007.safetensors",
|
151 |
-
"transformer.encoder.layers.27.post_attention_layernorm.weight": "model-00007-of-00007.safetensors",
|
152 |
-
"transformer.encoder.layers.27.self_attention.dense.weight": "model-00007-of-00007.safetensors",
|
153 |
-
"transformer.encoder.layers.27.self_attention.query_key_value.bias": "model-00007-of-00007.safetensors",
|
154 |
-
"transformer.encoder.layers.27.self_attention.query_key_value.weight": "model-00007-of-00007.safetensors",
|
155 |
-
"transformer.encoder.layers.3.input_layernorm.weight": "model-00001-of-00007.safetensors",
|
156 |
-
"transformer.encoder.layers.3.mlp.dense_4h_to_h.weight": "model-00002-of-00007.safetensors",
|
157 |
-
"transformer.encoder.layers.3.mlp.dense_h_to_4h.weight": "model-00002-of-00007.safetensors",
|
158 |
-
"transformer.encoder.layers.3.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
|
159 |
-
"transformer.encoder.layers.3.self_attention.dense.weight": "model-00001-of-00007.safetensors",
|
160 |
-
"transformer.encoder.layers.3.self_attention.query_key_value.bias": "model-00001-of-00007.safetensors",
|
161 |
-
"transformer.encoder.layers.3.self_attention.query_key_value.weight": "model-00001-of-00007.safetensors",
|
162 |
-
"transformer.encoder.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors",
|
163 |
-
"transformer.encoder.layers.4.mlp.dense_4h_to_h.weight": "model-00002-of-00007.safetensors",
|
164 |
-
"transformer.encoder.layers.4.mlp.dense_h_to_4h.weight": "model-00002-of-00007.safetensors",
|
165 |
-
"transformer.encoder.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
|
166 |
-
"transformer.encoder.layers.4.self_attention.dense.weight": "model-00002-of-00007.safetensors",
|
167 |
-
"transformer.encoder.layers.4.self_attention.query_key_value.bias": "model-00002-of-00007.safetensors",
|
168 |
-
"transformer.encoder.layers.4.self_attention.query_key_value.weight": "model-00002-of-00007.safetensors",
|
169 |
-
"transformer.encoder.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors",
|
170 |
-
"transformer.encoder.layers.5.mlp.dense_4h_to_h.weight": "model-00002-of-00007.safetensors",
|
171 |
-
"transformer.encoder.layers.5.mlp.dense_h_to_4h.weight": "model-00002-of-00007.safetensors",
|
172 |
-
"transformer.encoder.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
|
173 |
-
"transformer.encoder.layers.5.self_attention.dense.weight": "model-00002-of-00007.safetensors",
|
174 |
-
"transformer.encoder.layers.5.self_attention.query_key_value.bias": "model-00002-of-00007.safetensors",
|
175 |
-
"transformer.encoder.layers.5.self_attention.query_key_value.weight": "model-00002-of-00007.safetensors",
|
176 |
-
"transformer.encoder.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors",
|
177 |
-
"transformer.encoder.layers.6.mlp.dense_4h_to_h.weight": "model-00002-of-00007.safetensors",
|
178 |
-
"transformer.encoder.layers.6.mlp.dense_h_to_4h.weight": "model-00002-of-00007.safetensors",
|
179 |
-
"transformer.encoder.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
|
180 |
-
"transformer.encoder.layers.6.self_attention.dense.weight": "model-00002-of-00007.safetensors",
|
181 |
-
"transformer.encoder.layers.6.self_attention.query_key_value.bias": "model-00002-of-00007.safetensors",
|
182 |
-
"transformer.encoder.layers.6.self_attention.query_key_value.weight": "model-00002-of-00007.safetensors",
|
183 |
-
"transformer.encoder.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors",
|
184 |
-
"transformer.encoder.layers.7.mlp.dense_4h_to_h.weight": "model-00002-of-00007.safetensors",
|
185 |
-
"transformer.encoder.layers.7.mlp.dense_h_to_4h.weight": "model-00002-of-00007.safetensors",
|
186 |
-
"transformer.encoder.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
|
187 |
-
"transformer.encoder.layers.7.self_attention.dense.weight": "model-00002-of-00007.safetensors",
|
188 |
-
"transformer.encoder.layers.7.self_attention.query_key_value.bias": "model-00002-of-00007.safetensors",
|
189 |
-
"transformer.encoder.layers.7.self_attention.query_key_value.weight": "model-00002-of-00007.safetensors",
|
190 |
-
"transformer.encoder.layers.8.input_layernorm.weight": "model-00002-of-00007.safetensors",
|
191 |
-
"transformer.encoder.layers.8.mlp.dense_4h_to_h.weight": "model-00003-of-00007.safetensors",
|
192 |
-
"transformer.encoder.layers.8.mlp.dense_h_to_4h.weight": "model-00003-of-00007.safetensors",
|
193 |
-
"transformer.encoder.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
|
194 |
-
"transformer.encoder.layers.8.self_attention.dense.weight": "model-00003-of-00007.safetensors",
|
195 |
-
"transformer.encoder.layers.8.self_attention.query_key_value.bias": "model-00003-of-00007.safetensors",
|
196 |
-
"transformer.encoder.layers.8.self_attention.query_key_value.weight": "model-00003-of-00007.safetensors",
|
197 |
-
"transformer.encoder.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors",
|
198 |
-
"transformer.encoder.layers.9.mlp.dense_4h_to_h.weight": "model-00003-of-00007.safetensors",
|
199 |
-
"transformer.encoder.layers.9.mlp.dense_h_to_4h.weight": "model-00003-of-00007.safetensors",
|
200 |
-
"transformer.encoder.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
|
201 |
-
"transformer.encoder.layers.9.self_attention.dense.weight": "model-00003-of-00007.safetensors",
|
202 |
-
"transformer.encoder.layers.9.self_attention.query_key_value.bias": "model-00003-of-00007.safetensors",
|
203 |
-
"transformer.encoder.layers.9.self_attention.query_key_value.weight": "model-00003-of-00007.safetensors",
|
204 |
-
"transformer.output_layer.weight": "model-00007-of-00007.safetensors",
|
205 |
-
"transformer.rotary_pos_emb.inv_freq": "model-00001-of-00007.safetensors"
|
206 |
-
}
|
207 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modeling_chatglm.py
CHANGED
@@ -634,8 +634,7 @@ class GLMTransformer(torch.nn.Module):
|
|
634 |
attention_mask,
|
635 |
rotary_pos_emb,
|
636 |
kv_caches[index],
|
637 |
-
use_cache
|
638 |
-
use_reentrant=False
|
639 |
)
|
640 |
else:
|
641 |
layer_ret = layer(
|
@@ -698,9 +697,9 @@ class ChatGLMPreTrainedModel(PreTrainedModel):
|
|
698 |
position_ids = torch.arange(seq_length, dtype=torch.long, device=device).unsqueeze(0).repeat(batch_size, 1)
|
699 |
return position_ids
|
700 |
|
701 |
-
def
|
702 |
-
if
|
703 |
-
|
704 |
|
705 |
|
706 |
class Embedding(torch.nn.Module):
|
@@ -769,9 +768,6 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
|
|
769 |
def get_input_embeddings(self):
|
770 |
return self.embedding.word_embeddings
|
771 |
|
772 |
-
def set_input_embeddings(self, value):
|
773 |
-
self.embedding.word_embeddings = value
|
774 |
-
|
775 |
def get_prompt(self, batch_size, device, dtype=torch.half):
|
776 |
prefix_tokens = self.prefix_tokens.unsqueeze(0).expand(batch_size, -1).to(device)
|
777 |
past_key_values = self.prefix_encoder(prefix_tokens).type(dtype)
|
@@ -1004,10 +1000,7 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
|
|
1004 |
content = ""
|
1005 |
history = deepcopy(history)
|
1006 |
for response in output.split("<|assistant|>"):
|
1007 |
-
|
1008 |
-
metadata, content = response.split("\n", maxsplit=1)
|
1009 |
-
else:
|
1010 |
-
metadata, content = "", response
|
1011 |
if not metadata.strip():
|
1012 |
content = content.strip()
|
1013 |
history.append({"role": "assistant", "metadata": metadata, "content": content})
|
|
|
634 |
attention_mask,
|
635 |
rotary_pos_emb,
|
636 |
kv_caches[index],
|
637 |
+
use_cache
|
|
|
638 |
)
|
639 |
else:
|
640 |
layer_ret = layer(
|
|
|
697 |
position_ids = torch.arange(seq_length, dtype=torch.long, device=device).unsqueeze(0).repeat(batch_size, 1)
|
698 |
return position_ids
|
699 |
|
700 |
+
def _set_gradient_checkpointing(self, module, value=False):
|
701 |
+
if isinstance(module, GLMTransformer):
|
702 |
+
module.gradient_checkpointing = value
|
703 |
|
704 |
|
705 |
class Embedding(torch.nn.Module):
|
|
|
768 |
def get_input_embeddings(self):
|
769 |
return self.embedding.word_embeddings
|
770 |
|
|
|
|
|
|
|
771 |
def get_prompt(self, batch_size, device, dtype=torch.half):
|
772 |
prefix_tokens = self.prefix_tokens.unsqueeze(0).expand(batch_size, -1).to(device)
|
773 |
past_key_values = self.prefix_encoder(prefix_tokens).type(dtype)
|
|
|
1000 |
content = ""
|
1001 |
history = deepcopy(history)
|
1002 |
for response in output.split("<|assistant|>"):
|
1003 |
+
metadata, content = response.split("\n", maxsplit=1)
|
|
|
|
|
|
|
1004 |
if not metadata.strip():
|
1005 |
content = content.strip()
|
1006 |
history.append({"role": "assistant", "metadata": metadata, "content": content})
|
quantization.py
CHANGED
@@ -1,17 +1,20 @@
|
|
|
|
1 |
from torch.nn.parameter import Parameter
|
|
|
2 |
import bz2
|
3 |
import torch
|
4 |
import base64
|
5 |
import ctypes
|
6 |
from transformers.utils import logging
|
|
|
7 |
from typing import List
|
|
|
8 |
|
9 |
logger = logging.get_logger(__name__)
|
10 |
|
11 |
try:
|
12 |
from cpm_kernels.kernels.base import LazyKernelCModule, KernelFunction, round_up
|
13 |
|
14 |
-
|
15 |
class Kernel:
|
16 |
def __init__(self, code: bytes, function_names: List[str]):
|
17 |
self.code = code
|
@@ -21,7 +24,6 @@ try:
|
|
21 |
for name in self._function_names:
|
22 |
setattr(self, name, KernelFunction(self._cmodule, name))
|
23 |
|
24 |
-
|
25 |
quantization_code = "$QlpoOTFBWSZTWU9yuJUAQHN//////////f/n/8/n///n//bt4dTidcVx8X3V9FV/92/v4B7/AD5FBQFAAAChSgKpFCFAFVSigUAAAEKhSgUUqgFBKigqVREQAABQBQIANDTTIGI00BkZBkNGE0A0BkBkGQGRkaNAaAGQNBoGgDIAAYIGTI0DQAQAaGmmQMRpoDIyDIaMJoBoDIDIMgMjI0aA0AMgaDQNAGQAAwQMmRoGgAgA0NNMgYjTQGRkGQ0YTQDQGQGQZAZGRo0BoAZA0GgaAMgABggZMjQNABABoaaZAxGmgMjIMhowmgGgMgMgyAyMjRoDQAyBoNA0AZAADBAyZGgaAAmqU1NEgJqnptU/Sn4jRR6J6epk2pqb1Q/SgAPUGgyNNGjQ2SBpoAZAAGg0NB6mgDIAAAAA2oaApSREBNAARhGiYEaEwU8pvImlP0k2aam1GaGqbFNM1MHpTwmkepmyU9R6nqPKekHqNNPUxNGhp6n6p6QaZ6o9TG1GMqcoV9ly6nRanHlq6zPNbnGZNi6HSug+2nPiZ13XcnFYZW+45W11CumhzYhchOJ2GLLV1OBjBjGf4TptOddTSOcVxhqYZMYwZXZZY00zI1paX5X9J+b+f4e+x43RXSxXPOdquiGpduatGyXneN696M9t4HU2eR5XX/kPhP261NTx3JO1Ow7LyuDmeo9a7d351T1ZxnvnrvYnrXv/hXxPCeuYx2XsNmO003eg9J3Z6U7b23meJ4ri01OdzTk9BNO96brz+qT5nuvvH3ds/G+m/JcG/F2XYuhXlvO+jP7U3XgrzPN/lr8Sf1n6j4j7jZs+s/T0tNaNNYzTs12rxjwztHlnire3Nzc3N1wuBwOBwXBvZfoHpD7rFmR99V5vj3aXza3xdBbXMalubTg/jIv5dfAi54Pdc75j4z412n3Npj3Ld/ENm7a3b/Cod6h/ret1/5vn/C+l+gdslMvgPSLJ8d8q+U66fevYn/tW1chleEtNTGlcHCbLRlq0tHzF5tsbbZZfHjjLgZu42XCuC3NrdjTasZGNzgxPIrGqp7r3p7L2p5XjnpPSmTd5XtzqnB6U87zzg1Ol0zd0zsLszxR6lkxp35u6/teL0L0W922cR7Lu1lpL9CsHirzuM2T+BgsyViT6LHcm0/Vr6U/7LGGyJeqTEjt0PHWhF5mCT7R9mtlDwriYv0Tyr/OxYt6qp5r0mPVT0608TqnqMZaarU2nFwrTzzlrs1ed7z1ux60wyr4ydCaTi3enW8x68x0zU7tXSlcmPSW1mGpWJMg4zmPC2lK96tp0OE80y4MfEvnZj8zGluR6b22ki1Ou9V2nCd9xovcPvcYMZYy0lvN60ScZ45vN6yeCeeXFb1lVjnnCar5fwXwE2bzJ4HI1XVPXfXZMm44GUsMpYsmLB65TuVdm0cl0b+i/wGNN66XjeV7zuPpHcnK/juhhjdfId5jMdE5nN0dGmmm2zZs2cexD5n9p/dY352XsvXHaZNWWsmmS1atjR452nYudzvqv2HMRyvNNnlMcDl3R2+yx2uVrBubTW9icHDVtbNXlZm7jma1rM4VurZZd2y6nUau7ZXZ7bVU+mnoOVxZGMrVmvX60605JwmzGZhhhjTWtaaaMaaGTGmNMZasY0iX8VMUl8eepaIrzGSpemWOQyZORk2bNpjUybMmxqYmknCGCFynutfksaZpjTNMaaatM0xsxcGR0sociNqxNSmhhR1ZJPbsn8qyF0t2qH6iYBclclalbtTTcHTDsPaX6rlnElph2Jyumumtynv2Kk8GI7rsvXbIcJgHJOSaSXnnGaI3m87RtVXJOZ/YtgdTE6Wpha6ZlE8ayXkef1fh602r2WwvfMXtMdLlkfnLFdYYwYso+bWqm7yJqHXZGw2nrS5ZanSYnWlxBxMF1V940K2wdrI7R6OYf7DGGamMmTSbRhlS45xmVOumF1EyPCmHrrN8wwZOOrdNtLeMtzFzDlWnfTBxMk2NaXIZHBYxYLD4w8yju0ao65Vz1OIXoS9dLanwCe1PWrYuWMqf1if1z2k2yYfKJ741PDgno1ZQ8DRqvUny3mNoWTzGO6m1DkrJI8JiR5cSd+vZdGOO8nrMoc5+NDUFsMSXaZJeNlMmGLtJsovOsUp7I9S5VojKxF6bTVEelXqlfJobQr3LozSh2Jk7VcrVMfhXqszGWMzNqGhqZY0OadxkyyMssKugZR0KNFXBHlqwmJgTE/BNVMk6ItJXZMR0H47GpXv/DMOvNkmVuaV1PRfEdxuqc7Hcd+ZV/zTLaRxWk0nl9CdCeM6mn5rstHIBcpiuwmUZXeq81DacHI2rmrZ5SuE5mOZd6LQrZg9mx32TprA8BMo5jKN6yLTCi3WzQaZSuhzTtM1fUTGVpG8Tw+KXI0tjEpiWxtLYynOlktSbVlaI5kxP8TDH8kx50xoxi5KcA4pcja8KWLRlO/Ks6q06ergnvm1ca3Tq8Uw7LTUsmWyctXPWmpitl/uvGcWTGXGuAXDfhqazGmjkxcJW5hMMMMpYsXl2TZYtVOddG3XCarUt6Ptq9CZXSNzyuRzqRZOjsxdBbFVz6OA5HI43r1jityVlVpVkxmOsyaYWE1NTGq1sOVh36mHMcxtSvcy70edG0ZGR3I1Go1GRlV7mWWo1G0ZGRqlvH40l7o4m5xMWLLLYyNjnqc8556mdPqLJ31n/1nWOncxzG1tizrHs/Z+d2vP/B/l8wdJ6rHUn2nbbDq4p6htFtYzMMMTaZis1K5GKzGNmxhmUx2DDlZ/qNnIx41xnaMfCZWYaZWtNLTNW8ND4Fw1MyZOCdM428suKG1ehW8TesOydg7J+YYcD4cYR+8dFK6M4E3HM9ZfRNNL+Sn6rsl4DsrDl2HpPCnfxjGXtbZtYys1ttlyJ4T+BvexjGWRjMszK4Jpc77D3GyuVD7q0+G8m9G+2+rGm7cOR2y7FdtY2XUYx/oNlfRYxhMYyYZkyyg55enna9Kt/FFi6GMMwYwdwxWgxGMLKYmUyGExTKMZkMFhkymKuh0NOBNnBu+23LdwDoZYYzGGMxtORaTU1pjTGWTTGGtMrNWUsyyTTLLG1qy2ZjbK2DBllWqxMtBMaYZQmcE7zvvRcTkclUwdkxTaSdyySt/7fpL+T1v516Ji97fwr5JbLu305zMn5+GMTTZ9F+y7ExwmGVfG44yxn3dLv6l5i+Wth1jCrDq21nW9LqvvDzz3Vf3LLH/O/32TJ/erx3bXftO4eF+G956D952K/An4NfvOpjFjExjevP/UmE0fIoZXx6/w6lX/no3D0bLt+ixjieBM6ksRd0yB4Lt2SwYNE+gd1detlZWUnpiZfGfFaK+4PyCa/v18V8X75pe9fLXzp7l3VjF76vWZmHwGz1IZNWT7b8yddJ4q5kyrVdfru6atWc7bVYztL9Jf4GXvT+Y8m9/YsXP6H018a8D4XVOqvfzqeR+6yZOD8dPv0+U7/q5Pl+2dNb0MjzGVH5p6MNQ7cOWvw62U9aHE8DprDek+McLyvDz+te+9Zhq5+YTruufMcWMabqysTmZVWjKPfnK0wyVcrsuhjZRdLkHNvD72b9abriOSGIxiLixMOoalNPXzy+wT/tf+U6HHONfsz+xe8ufHBdQWWGWLA9if0rsnmrxK5LvRZQeWsTCsrmOYy8VteVfuRfcVTtDLItLIsMYxZLdU/DbtSemxF6Z6Zo5WBXE4tFdCyVMMXMTEMZXVlS6Xec2T4e0tHsRcEuWshcJ2YsNF5rUx1E8ifCq6Z+ZP7qdCeu/aTwFd53l16/o0NOw6O3dLavP4Hbi4RdmuDk6DoYaninC0+o4uZjbJ7Rxeu0/FbuFg+q7DVS6fQe0rZ6NDGUNNU6DEqOaLTicKnYZMnBWruljQxoaS3dZhocDge0bSTyOvdAbG5hxe2xji7E/L55xX13wWNDi6HCekcFxfCPGxY0MXC+s7afWaMdDyjyr+o8Rudm/NabOZvdl274zH4f5XK9z6On1Pe/K5TdPAslg77BjuO6Y3eO7GqvOPG/stknp1leyvLL0Z7bl9I4noMvLkzytLhWYzrOZzLXCORe028rORzOg4N/L0HlMOQ3Pgmnbb6KczlabORpu980q37TBqRu0/p3PO6234Bl03Ynuz+9W7gnsEcmvYaYY3aMYY0wx3pYd+ujsXauWdaY5Xkbtl23fPzFHiDB/QMo0yFjBllYxTQYYyxkrwn7JufwJ/PfgJ+C83X69ni6zvXcnyXabv0ncbLwsceS+RNlyN2mnneJtX0ngYO0+e+0+UnA+Wch3ji8hj5an4h+i6XBySU4n+R0roVcbw5yvHrmr4Yw8Y7x6c+9POPYHI5HI5HI5HI5HGXGww4nE4nrVyOR8XeqPEO7PLOiukYa3Novk5hV4cdtYZLI93e+uxff2jRo0aNGjRo0aNG1bVtW1dy3m83m8+tQ5ZzHw3nObwOu8La9Rc1dtkdS8A3eTk823tnktXWlxN6Oixe06zrN70Isd9jiOgZFq9yfkPqP/SLhN2Myl8jDM43bl1nbcb4cO57jlh8Jow6pzXZdL4dyODTuuhu77FyO27DdwdRxmvO+O+3N2+BdqyTwLHVczDVY4UPE4O66/ZO2cx1LFzVdSXtF7G4HMbrauOHRw6c8FdZ5m9fHZHYZXfTlZquyynSyTTKke6vcffSD9pzPA/G7n7jxPmuhc1DHMynPMrGL6AdewYmwu5ko+UUyTwrMv27rPH1v1nGqd87+p6N6LU8k3NEng53xXyHS97+44OSg/sy/hn+Se6yfYNjW0/uTgP+PvWYzLMmjhcLB/gGpri6H83/84eUXWT6T9Hsv7785z/7z4icpW+zfXypuR7rx/gMdZb1/wC678pcs8/2a3mDitGHxl9mfPlll5MafWWqxk/eYuTDgcNMzDGWLWvsuglNxs53GtN6uWpktlW1tZZYcuinMMWmnNnJydze3b2Y1McBxrBkXw799izLMZZYyy0TkbsGM4p03S2uVu5s/XXUdSdec6smVxZYYGpVmT8A+8ajuEyV5FatkvVru2x6uxGXXbH4A+jvgP4GMYy3iPLXzq/6z65+E005ey+cwMZD3fZcqc6xpjTFjQ0P3U+e++cPYmTIwj0nrK5NPTfl3WvpfLtXDcb2HQMudYOxFXQBor4L4T6vrOauFctYXJQ++NUWmJe5bmx1jDiZS1dTqWxo4GR8jm3fttpmPHppk9PEyv4/y8/sO07XacOmcqc0x2Vi9BvNJvN5oW8x4mOsydpidRxMYJPx06m1bqPzq9KtK8sxXNXFodD/+MYYaJTLwOhc9brCsV18oOR1i4tXChyTkq4lf4y1Ke+9axjDHqs1mfBbMXuP4Hzi+X7t8vzv7bHerrUPgPCxhjre4fXdfLNtNM+Jd+Zdh8xd8wP87uNPoPgv4W7/5P2BuxfsMabNnMnza+54Pdi5U671GPZY8CehX8Voeoo7FHpkeEc6715FwHZrIrUrHaviPUbPZHND+IhczrP6FcYvhOZ0Di/ETt0OI+YwNWR9r7tpf6WDeZKZDB1+z2IthOl1mPyb5FluvEx9h9d0NnM0Y1XPFkWIsk1WotJ0PBMmkvjvQTd0e71tfeV+8r8lQ/tpzpsmxJ+InrI/dj2UajUajVTUajatRqNRtGo1Go1Go4wjeMpZFMVV9CHbofPraLsJ3JpWV2XOoanCuFky4y3PPNxucK2uKC1Lbdb1eo+m5XomN6HfeZsabHLHRX/K+offtNGGmHWctcVcG44MdSqsOLY9VzX+Zxfxn2HPdWTpzWvkrtJ8M5zorrKcquRytJ5N5DZmcaW02l76nWO+BqPXm1A2Ry/0q71dH/mqrqeFjkYxjEXtsX8qubTk67rGycyqsdm4tZx5D6D5hhi0waaWmiaMP81Yjii5qxPlPuU/GfTL1Y5E6Jyfiq63qTa39A4J0sOGDgO9WF9bOXl0XfPRbsY2bPNKPy1YrFYrFYmRhhlTIyMjJWJYZHXuCXI8OoXsvfljGLFicNifpp2XunoPiG1wtx3p1Tah+/DD66OnVtVXP9rKbVxOnL0tR/rHtqB5UDErUVcl11D4qqvjpOcxX7armUNJB3LpW6bxVvD08e8h3odKKvyCFZBdSh2FVcST9xV3n3T8t1j7Kr9qgrqXg+13Pt5U7JCvFXVIV1YG5lRhkVYZJYYDDD4KOIMoHCp26WS8GB7uBh2zIdgq/PKyInjV2STShuoapUdCpX1yTwqq/z1VvET7Kh5nVPkO8YyxjLt2MaaMmWTLQvx3qnzltnXW0p2jxgbEtSny/Osv8Y9pLMXYoHVPAhkVdWVeODhR6q9/Sxe2liwwZWMVvFXfRkeIDxAePUPIrdJ4ey6yquzH+PD/bUOWAu05qVHtFd8rrKHSoeNIOUqrYr3FXyToqfYJgwmJdKpXXOwYYegNNGMzfZPp/t3t/DVs4zjNTN61rRqaWaa4NYbRjTa0tWwy2Y2tGN8ZO8ofNKq4j9SL7I+cSm4/6ovLV5HNXLI0jJidwrtk6ynCaP6Z++GjRlWS3tLeW129Mi9evxU9mtz6s5J3Z7M2ngTgnKvmpomxpaLCzPfmx0JWE+m3NLDDGOX47RctdYYNK5jakdqLkRlI39n590T5zctGSwwZZDJj6kW8XSi6ot2MmWWJ0DUT3nuvebBudScjZ79g8cWJ8av0k+/bE5WKd5MdbFpbDVMxu1DVMmtNZGJvq1mtRbn6M+g/kP0FwDwr7quZs7xosNGpbscyxhhd9TyJyFwbLcxlTasg75vW7TsV5K7ji44XPMMrdoj+Y3rT0Hie62nlYV/pwczzOmdLqLhYkzGMzCZWGMQzGMSsZYY6Di1t4nlJ+Em63mJxrVLxPbYxNEdgc1dU2iOKyoYYWjNrEeHTYybVk0atSa7ehuwsWMWTqn1TrnS6hYsi71d1+s+k+ic70e20fzE/VaTdxT9ZtU4GIXdeNx3X77guYYfpHeTQjaMX6brOu4OY4K7Y2d9mbHarI5ox3p4GpJ2Vd/Tst60f7j999pppjR+Q/Qf8J/VaORs3cji7FfFuN61+ui9s8hix1OCh5KGVV23BPXvZfz3CLyHpix+exi8z/KnCnosY2eunor+cxyPO/xJ0vKey9OvE9VjqaYu0x3Z3jd6o2b1T12D+F8l232lwaaacD5LE8LBxu7WTlbWraWpew8Xexjel3E+wWD4APITdNqR8F3R3T0lunCQ4GaE9R37DxeCYfcHi4xci5ovKfxVs55y2hf+65E/Xdp6jR5nrebTmi5incpkyOjs50JvrZwstbbW6kfuuQw+2mykf/EXNFzxfKTrxew929TR6bWnGL//F3JFOFCQT3K4lQ"
|
26 |
|
27 |
kernels = Kernel(
|
@@ -120,12 +122,11 @@ def extract_weight_to_half(weight: torch.Tensor, scale_list: torch.Tensor, sourc
|
|
120 |
|
121 |
|
122 |
class QuantizedLinear(torch.nn.Module):
|
123 |
-
def __init__(self, weight_bit_width: int, weight, bias=None, device="
|
|
|
124 |
super().__init__()
|
125 |
-
weight = weight.to(device) # ensure the weight is on the cuda device
|
126 |
-
assert str(weight.device).startswith(
|
127 |
-
'cuda'), 'The weights that need to be quantified should be on the CUDA device'
|
128 |
self.weight_bit_width = weight_bit_width
|
|
|
129 |
shape = weight.shape
|
130 |
|
131 |
if weight is None or empty_init:
|
@@ -153,7 +154,7 @@ def quantize(model, weight_bit_width, empty_init=False, device=None):
|
|
153 |
for layer in model.layers:
|
154 |
layer.self_attention.query_key_value = QuantizedLinear(
|
155 |
weight_bit_width=weight_bit_width,
|
156 |
-
weight=layer.self_attention.query_key_value.weight,
|
157 |
bias=layer.self_attention.query_key_value.bias,
|
158 |
dtype=layer.self_attention.query_key_value.weight.dtype,
|
159 |
device=layer.self_attention.query_key_value.weight.device if device is None else device,
|
@@ -161,7 +162,7 @@ def quantize(model, weight_bit_width, empty_init=False, device=None):
|
|
161 |
)
|
162 |
layer.self_attention.dense = QuantizedLinear(
|
163 |
weight_bit_width=weight_bit_width,
|
164 |
-
weight=layer.self_attention.dense.weight,
|
165 |
bias=layer.self_attention.dense.bias,
|
166 |
dtype=layer.self_attention.dense.weight.dtype,
|
167 |
device=layer.self_attention.dense.weight.device if device is None else device,
|
@@ -169,7 +170,7 @@ def quantize(model, weight_bit_width, empty_init=False, device=None):
|
|
169 |
)
|
170 |
layer.mlp.dense_h_to_4h = QuantizedLinear(
|
171 |
weight_bit_width=weight_bit_width,
|
172 |
-
weight=layer.mlp.dense_h_to_4h.weight,
|
173 |
bias=layer.mlp.dense_h_to_4h.bias,
|
174 |
dtype=layer.mlp.dense_h_to_4h.weight.dtype,
|
175 |
device=layer.mlp.dense_h_to_4h.weight.device if device is None else device,
|
@@ -177,7 +178,7 @@ def quantize(model, weight_bit_width, empty_init=False, device=None):
|
|
177 |
)
|
178 |
layer.mlp.dense_4h_to_h = QuantizedLinear(
|
179 |
weight_bit_width=weight_bit_width,
|
180 |
-
weight=layer.mlp.dense_4h_to_h.weight,
|
181 |
bias=layer.mlp.dense_4h_to_h.bias,
|
182 |
dtype=layer.mlp.dense_4h_to_h.weight.dtype,
|
183 |
device=layer.mlp.dense_4h_to_h.weight.device if device is None else device,
|
|
|
1 |
+
from torch.nn import Linear
|
2 |
from torch.nn.parameter import Parameter
|
3 |
+
|
4 |
import bz2
|
5 |
import torch
|
6 |
import base64
|
7 |
import ctypes
|
8 |
from transformers.utils import logging
|
9 |
+
|
10 |
from typing import List
|
11 |
+
from functools import partial
|
12 |
|
13 |
logger = logging.get_logger(__name__)
|
14 |
|
15 |
try:
|
16 |
from cpm_kernels.kernels.base import LazyKernelCModule, KernelFunction, round_up
|
17 |
|
|
|
18 |
class Kernel:
|
19 |
def __init__(self, code: bytes, function_names: List[str]):
|
20 |
self.code = code
|
|
|
24 |
for name in self._function_names:
|
25 |
setattr(self, name, KernelFunction(self._cmodule, name))
|
26 |
|
|
|
27 |
quantization_code = "$QlpoOTFBWSZTWU9yuJUAQHN//////////f/n/8/n///n//bt4dTidcVx8X3V9FV/92/v4B7/AD5FBQFAAAChSgKpFCFAFVSigUAAAEKhSgUUqgFBKigqVREQAABQBQIANDTTIGI00BkZBkNGE0A0BkBkGQGRkaNAaAGQNBoGgDIAAYIGTI0DQAQAaGmmQMRpoDIyDIaMJoBoDIDIMgMjI0aA0AMgaDQNAGQAAwQMmRoGgAgA0NNMgYjTQGRkGQ0YTQDQGQGQZAZGRo0BoAZA0GgaAMgABggZMjQNABABoaaZAxGmgMjIMhowmgGgMgMgyAyMjRoDQAyBoNA0AZAADBAyZGgaAAmqU1NEgJqnptU/Sn4jRR6J6epk2pqb1Q/SgAPUGgyNNGjQ2SBpoAZAAGg0NB6mgDIAAAAA2oaApSREBNAARhGiYEaEwU8pvImlP0k2aam1GaGqbFNM1MHpTwmkepmyU9R6nqPKekHqNNPUxNGhp6n6p6QaZ6o9TG1GMqcoV9ly6nRanHlq6zPNbnGZNi6HSug+2nPiZ13XcnFYZW+45W11CumhzYhchOJ2GLLV1OBjBjGf4TptOddTSOcVxhqYZMYwZXZZY00zI1paX5X9J+b+f4e+x43RXSxXPOdquiGpduatGyXneN696M9t4HU2eR5XX/kPhP261NTx3JO1Ow7LyuDmeo9a7d351T1ZxnvnrvYnrXv/hXxPCeuYx2XsNmO003eg9J3Z6U7b23meJ4ri01OdzTk9BNO96brz+qT5nuvvH3ds/G+m/JcG/F2XYuhXlvO+jP7U3XgrzPN/lr8Sf1n6j4j7jZs+s/T0tNaNNYzTs12rxjwztHlnire3Nzc3N1wuBwOBwXBvZfoHpD7rFmR99V5vj3aXza3xdBbXMalubTg/jIv5dfAi54Pdc75j4z412n3Npj3Ld/ENm7a3b/Cod6h/ret1/5vn/C+l+gdslMvgPSLJ8d8q+U66fevYn/tW1chleEtNTGlcHCbLRlq0tHzF5tsbbZZfHjjLgZu42XCuC3NrdjTasZGNzgxPIrGqp7r3p7L2p5XjnpPSmTd5XtzqnB6U87zzg1Ol0zd0zsLszxR6lkxp35u6/teL0L0W922cR7Lu1lpL9CsHirzuM2T+BgsyViT6LHcm0/Vr6U/7LGGyJeqTEjt0PHWhF5mCT7R9mtlDwriYv0Tyr/OxYt6qp5r0mPVT0608TqnqMZaarU2nFwrTzzlrs1ed7z1ux60wyr4ydCaTi3enW8x68x0zU7tXSlcmPSW1mGpWJMg4zmPC2lK96tp0OE80y4MfEvnZj8zGluR6b22ki1Ou9V2nCd9xovcPvcYMZYy0lvN60ScZ45vN6yeCeeXFb1lVjnnCar5fwXwE2bzJ4HI1XVPXfXZMm44GUsMpYsmLB65TuVdm0cl0b+i/wGNN66XjeV7zuPpHcnK/juhhjdfId5jMdE5nN0dGmmm2zZs2cexD5n9p/dY352XsvXHaZNWWsmmS1atjR452nYudzvqv2HMRyvNNnlMcDl3R2+yx2uVrBubTW9icHDVtbNXlZm7jma1rM4VurZZd2y6nUau7ZXZ7bVU+mnoOVxZGMrVmvX60605JwmzGZhhhjTWtaaaMaaGTGmNMZasY0iX8VMUl8eepaIrzGSpemWOQyZORk2bNpjUybMmxqYmknCGCFynutfksaZpjTNMaaatM0xsxcGR0sociNqxNSmhhR1ZJPbsn8qyF0t2qH6iYBclclalbtTTcHTDsPaX6rlnElph2Jyumumtynv2Kk8GI7rsvXbIcJgHJOSaSXnnGaI3m87RtVXJOZ/YtgdTE6Wpha6ZlE8ayXkef1fh602r2WwvfMXtMdLlkfnLFdYYwYso+bWqm7yJqHXZGw2nrS5ZanSYnWlxBxMF1V940K2wdrI7R6OYf7DGGamMmTSbRhlS45xmVOumF1EyPCmHrrN8wwZOOrdNtLeMtzFzDlWnfTBxMk2NaXIZHBYxYLD4w8yju0ao65Vz1OIXoS9dLanwCe1PWrYuWMqf1if1z2k2yYfKJ741PDgno1ZQ8DRqvUny3mNoWTzGO6m1DkrJI8JiR5cSd+vZdGOO8nrMoc5+NDUFsMSXaZJeNlMmGLtJsovOsUp7I9S5VojKxF6bTVEelXqlfJobQr3LozSh2Jk7VcrVMfhXqszGWMzNqGhqZY0OadxkyyMssKugZR0KNFXBHlqwmJgTE/BNVMk6ItJXZMR0H47GpXv/DMOvNkmVuaV1PRfEdxuqc7Hcd+ZV/zTLaRxWk0nl9CdCeM6mn5rstHIBcpiuwmUZXeq81DacHI2rmrZ5SuE5mOZd6LQrZg9mx32TprA8BMo5jKN6yLTCi3WzQaZSuhzTtM1fUTGVpG8Tw+KXI0tjEpiWxtLYynOlktSbVlaI5kxP8TDH8kx50xoxi5KcA4pcja8KWLRlO/Ks6q06ergnvm1ca3Tq8Uw7LTUsmWyctXPWmpitl/uvGcWTGXGuAXDfhqazGmjkxcJW5hMMMMpYsXl2TZYtVOddG3XCarUt6Ptq9CZXSNzyuRzqRZOjsxdBbFVz6OA5HI43r1jityVlVpVkxmOsyaYWE1NTGq1sOVh36mHMcxtSvcy70edG0ZGR3I1Go1GRlV7mWWo1G0ZGRqlvH40l7o4m5xMWLLLYyNjnqc8556mdPqLJ31n/1nWOncxzG1tizrHs/Z+d2vP/B/l8wdJ6rHUn2nbbDq4p6htFtYzMMMTaZis1K5GKzGNmxhmUx2DDlZ/qNnIx41xnaMfCZWYaZWtNLTNW8ND4Fw1MyZOCdM428suKG1ehW8TesOydg7J+YYcD4cYR+8dFK6M4E3HM9ZfRNNL+Sn6rsl4DsrDl2HpPCnfxjGXtbZtYys1ttlyJ4T+BvexjGWRjMszK4Jpc77D3GyuVD7q0+G8m9G+2+rGm7cOR2y7FdtY2XUYx/oNlfRYxhMYyYZkyyg55enna9Kt/FFi6GMMwYwdwxWgxGMLKYmUyGExTKMZkMFhkymKuh0NOBNnBu+23LdwDoZYYzGGMxtORaTU1pjTGWTTGGtMrNWUsyyTTLLG1qy2ZjbK2DBllWqxMtBMaYZQmcE7zvvRcTkclUwdkxTaSdyySt/7fpL+T1v516Ji97fwr5JbLu305zMn5+GMTTZ9F+y7ExwmGVfG44yxn3dLv6l5i+Wth1jCrDq21nW9LqvvDzz3Vf3LLH/O/32TJ/erx3bXftO4eF+G956D952K/An4NfvOpjFjExjevP/UmE0fIoZXx6/w6lX/no3D0bLt+ixjieBM6ksRd0yB4Lt2SwYNE+gd1detlZWUnpiZfGfFaK+4PyCa/v18V8X75pe9fLXzp7l3VjF76vWZmHwGz1IZNWT7b8yddJ4q5kyrVdfru6atWc7bVYztL9Jf4GXvT+Y8m9/YsXP6H018a8D4XVOqvfzqeR+6yZOD8dPv0+U7/q5Pl+2dNb0MjzGVH5p6MNQ7cOWvw62U9aHE8DprDek+McLyvDz+te+9Zhq5+YTruufMcWMabqysTmZVWjKPfnK0wyVcrsuhjZRdLkHNvD72b9abriOSGIxiLixMOoalNPXzy+wT/tf+U6HHONfsz+xe8ufHBdQWWGWLA9if0rsnmrxK5LvRZQeWsTCsrmOYy8VteVfuRfcVTtDLItLIsMYxZLdU/DbtSemxF6Z6Zo5WBXE4tFdCyVMMXMTEMZXVlS6Xec2T4e0tHsRcEuWshcJ2YsNF5rUx1E8ifCq6Z+ZP7qdCeu/aTwFd53l16/o0NOw6O3dLavP4Hbi4RdmuDk6DoYaninC0+o4uZjbJ7Rxeu0/FbuFg+q7DVS6fQe0rZ6NDGUNNU6DEqOaLTicKnYZMnBWruljQxoaS3dZhocDge0bSTyOvdAbG5hxe2xji7E/L55xX13wWNDi6HCekcFxfCPGxY0MXC+s7afWaMdDyjyr+o8Rudm/NabOZvdl274zH4f5XK9z6On1Pe/K5TdPAslg77BjuO6Y3eO7GqvOPG/stknp1leyvLL0Z7bl9I4noMvLkzytLhWYzrOZzLXCORe028rORzOg4N/L0HlMOQ3Pgmnbb6KczlabORpu980q37TBqRu0/p3PO6234Bl03Ynuz+9W7gnsEcmvYaYY3aMYY0wx3pYd+ujsXauWdaY5Xkbtl23fPzFHiDB/QMo0yFjBllYxTQYYyxkrwn7JufwJ/PfgJ+C83X69ni6zvXcnyXabv0ncbLwsceS+RNlyN2mnneJtX0ngYO0+e+0+UnA+Wch3ji8hj5an4h+i6XBySU4n+R0roVcbw5yvHrmr4Yw8Y7x6c+9POPYHI5HI5HI5HI5HGXGww4nE4nrVyOR8XeqPEO7PLOiukYa3Novk5hV4cdtYZLI93e+uxff2jRo0aNGjRo0aNG1bVtW1dy3m83m8+tQ5ZzHw3nObwOu8La9Rc1dtkdS8A3eTk823tnktXWlxN6Oixe06zrN70Isd9jiOgZFq9yfkPqP/SLhN2Myl8jDM43bl1nbcb4cO57jlh8Jow6pzXZdL4dyODTuuhu77FyO27DdwdRxmvO+O+3N2+BdqyTwLHVczDVY4UPE4O66/ZO2cx1LFzVdSXtF7G4HMbrauOHRw6c8FdZ5m9fHZHYZXfTlZquyynSyTTKke6vcffSD9pzPA/G7n7jxPmuhc1DHMynPMrGL6AdewYmwu5ko+UUyTwrMv27rPH1v1nGqd87+p6N6LU8k3NEng53xXyHS97+44OSg/sy/hn+Se6yfYNjW0/uTgP+PvWYzLMmjhcLB/gGpri6H83/84eUXWT6T9Hsv7785z/7z4icpW+zfXypuR7rx/gMdZb1/wC678pcs8/2a3mDitGHxl9mfPlll5MafWWqxk/eYuTDgcNMzDGWLWvsuglNxs53GtN6uWpktlW1tZZYcuinMMWmnNnJydze3b2Y1McBxrBkXw799izLMZZYyy0TkbsGM4p03S2uVu5s/XXUdSdec6smVxZYYGpVmT8A+8ajuEyV5FatkvVru2x6uxGXXbH4A+jvgP4GMYy3iPLXzq/6z65+E005ey+cwMZD3fZcqc6xpjTFjQ0P3U+e++cPYmTIwj0nrK5NPTfl3WvpfLtXDcb2HQMudYOxFXQBor4L4T6vrOauFctYXJQ++NUWmJe5bmx1jDiZS1dTqWxo4GR8jm3fttpmPHppk9PEyv4/y8/sO07XacOmcqc0x2Vi9BvNJvN5oW8x4mOsydpidRxMYJPx06m1bqPzq9KtK8sxXNXFodD/+MYYaJTLwOhc9brCsV18oOR1i4tXChyTkq4lf4y1Ke+9axjDHqs1mfBbMXuP4Hzi+X7t8vzv7bHerrUPgPCxhjre4fXdfLNtNM+Jd+Zdh8xd8wP87uNPoPgv4W7/5P2BuxfsMabNnMnza+54Pdi5U671GPZY8CehX8Voeoo7FHpkeEc6715FwHZrIrUrHaviPUbPZHND+IhczrP6FcYvhOZ0Di/ETt0OI+YwNWR9r7tpf6WDeZKZDB1+z2IthOl1mPyb5FluvEx9h9d0NnM0Y1XPFkWIsk1WotJ0PBMmkvjvQTd0e71tfeV+8r8lQ/tpzpsmxJ+InrI/dj2UajUajVTUajatRqNRtGo1Go1Go4wjeMpZFMVV9CHbofPraLsJ3JpWV2XOoanCuFky4y3PPNxucK2uKC1Lbdb1eo+m5XomN6HfeZsabHLHRX/K+offtNGGmHWctcVcG44MdSqsOLY9VzX+Zxfxn2HPdWTpzWvkrtJ8M5zorrKcquRytJ5N5DZmcaW02l76nWO+BqPXm1A2Ry/0q71dH/mqrqeFjkYxjEXtsX8qubTk67rGycyqsdm4tZx5D6D5hhi0waaWmiaMP81Yjii5qxPlPuU/GfTL1Y5E6Jyfiq63qTa39A4J0sOGDgO9WF9bOXl0XfPRbsY2bPNKPy1YrFYrFYmRhhlTIyMjJWJYZHXuCXI8OoXsvfljGLFicNifpp2XunoPiG1wtx3p1Tah+/DD66OnVtVXP9rKbVxOnL0tR/rHtqB5UDErUVcl11D4qqvjpOcxX7armUNJB3LpW6bxVvD08e8h3odKKvyCFZBdSh2FVcST9xV3n3T8t1j7Kr9qgrqXg+13Pt5U7JCvFXVIV1YG5lRhkVYZJYYDDD4KOIMoHCp26WS8GB7uBh2zIdgq/PKyInjV2STShuoapUdCpX1yTwqq/z1VvET7Kh5nVPkO8YyxjLt2MaaMmWTLQvx3qnzltnXW0p2jxgbEtSny/Osv8Y9pLMXYoHVPAhkVdWVeODhR6q9/Sxe2liwwZWMVvFXfRkeIDxAePUPIrdJ4ey6yquzH+PD/bUOWAu05qVHtFd8rrKHSoeNIOUqrYr3FXyToqfYJgwmJdKpXXOwYYegNNGMzfZPp/t3t/DVs4zjNTN61rRqaWaa4NYbRjTa0tWwy2Y2tGN8ZO8ofNKq4j9SL7I+cSm4/6ovLV5HNXLI0jJidwrtk6ynCaP6Z++GjRlWS3tLeW129Mi9evxU9mtz6s5J3Z7M2ngTgnKvmpomxpaLCzPfmx0JWE+m3NLDDGOX47RctdYYNK5jakdqLkRlI39n590T5zctGSwwZZDJj6kW8XSi6ot2MmWWJ0DUT3nuvebBudScjZ79g8cWJ8av0k+/bE5WKd5MdbFpbDVMxu1DVMmtNZGJvq1mtRbn6M+g/kP0FwDwr7quZs7xosNGpbscyxhhd9TyJyFwbLcxlTasg75vW7TsV5K7ji44XPMMrdoj+Y3rT0Hie62nlYV/pwczzOmdLqLhYkzGMzCZWGMQzGMSsZYY6Di1t4nlJ+Em63mJxrVLxPbYxNEdgc1dU2iOKyoYYWjNrEeHTYybVk0atSa7ehuwsWMWTqn1TrnS6hYsi71d1+s+k+ic70e20fzE/VaTdxT9ZtU4GIXdeNx3X77guYYfpHeTQjaMX6brOu4OY4K7Y2d9mbHarI5ox3p4GpJ2Vd/Tst60f7j999pppjR+Q/Qf8J/VaORs3cji7FfFuN61+ui9s8hix1OCh5KGVV23BPXvZfz3CLyHpix+exi8z/KnCnosY2eunor+cxyPO/xJ0vKey9OvE9VjqaYu0x3Z3jd6o2b1T12D+F8l232lwaaacD5LE8LBxu7WTlbWraWpew8Xexjel3E+wWD4APITdNqR8F3R3T0lunCQ4GaE9R37DxeCYfcHi4xci5ovKfxVs55y2hf+65E/Xdp6jR5nrebTmi5incpkyOjs50JvrZwstbbW6kfuuQw+2mykf/EXNFzxfKTrxew929TR6bWnGL//F3JFOFCQT3K4lQ"
|
28 |
|
29 |
kernels = Kernel(
|
|
|
122 |
|
123 |
|
124 |
class QuantizedLinear(torch.nn.Module):
|
125 |
+
def __init__(self, weight_bit_width: int, weight, bias=None, device="cpu", dtype=None, empty_init=False, *args,
|
126 |
+
**kwargs):
|
127 |
super().__init__()
|
|
|
|
|
|
|
128 |
self.weight_bit_width = weight_bit_width
|
129 |
+
|
130 |
shape = weight.shape
|
131 |
|
132 |
if weight is None or empty_init:
|
|
|
154 |
for layer in model.layers:
|
155 |
layer.self_attention.query_key_value = QuantizedLinear(
|
156 |
weight_bit_width=weight_bit_width,
|
157 |
+
weight=layer.self_attention.query_key_value.weight.to(torch.cuda.current_device()),
|
158 |
bias=layer.self_attention.query_key_value.bias,
|
159 |
dtype=layer.self_attention.query_key_value.weight.dtype,
|
160 |
device=layer.self_attention.query_key_value.weight.device if device is None else device,
|
|
|
162 |
)
|
163 |
layer.self_attention.dense = QuantizedLinear(
|
164 |
weight_bit_width=weight_bit_width,
|
165 |
+
weight=layer.self_attention.dense.weight.to(torch.cuda.current_device()),
|
166 |
bias=layer.self_attention.dense.bias,
|
167 |
dtype=layer.self_attention.dense.weight.dtype,
|
168 |
device=layer.self_attention.dense.weight.device if device is None else device,
|
|
|
170 |
)
|
171 |
layer.mlp.dense_h_to_4h = QuantizedLinear(
|
172 |
weight_bit_width=weight_bit_width,
|
173 |
+
weight=layer.mlp.dense_h_to_4h.weight.to(torch.cuda.current_device()),
|
174 |
bias=layer.mlp.dense_h_to_4h.bias,
|
175 |
dtype=layer.mlp.dense_h_to_4h.weight.dtype,
|
176 |
device=layer.mlp.dense_h_to_4h.weight.device if device is None else device,
|
|
|
178 |
)
|
179 |
layer.mlp.dense_4h_to_h = QuantizedLinear(
|
180 |
weight_bit_width=weight_bit_width,
|
181 |
+
weight=layer.mlp.dense_4h_to_h.weight.to(torch.cuda.current_device()),
|
182 |
bias=layer.mlp.dense_4h_to_h.bias,
|
183 |
dtype=layer.mlp.dense_4h_to_h.weight.dtype,
|
184 |
device=layer.mlp.dense_4h_to_h.weight.device if device is None else device,
|
special_tokens_map.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{}
|
|
|
|
tokenization_chatglm.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import json
|
2 |
import os
|
3 |
-
import
|
4 |
from typing import List, Optional, Union, Dict
|
5 |
from sentencepiece import SentencePieceProcessor
|
6 |
from transformers import PreTrainedTokenizer
|
@@ -8,9 +8,6 @@ from transformers.utils import logging, PaddingStrategy
|
|
8 |
from transformers.tokenization_utils_base import EncodedInput, BatchEncoding
|
9 |
|
10 |
|
11 |
-
logger = logging.get_logger(__name__)
|
12 |
-
|
13 |
-
|
14 |
class SPTokenizer:
|
15 |
def __init__(self, model_path: str):
|
16 |
# reload tokenizer
|
@@ -24,30 +21,17 @@ class SPTokenizer:
|
|
24 |
self.pad_id: int = self.sp_model.unk_id()
|
25 |
assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
|
26 |
|
27 |
-
|
28 |
-
|
29 |
self.special_tokens = {}
|
30 |
self.index_special_tokens = {}
|
31 |
for token in special_tokens:
|
32 |
self.special_tokens[token] = self.n_words
|
33 |
self.index_special_tokens[self.n_words] = token
|
34 |
self.n_words += 1
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
if encode_special_tokens:
|
39 |
-
last_index = 0
|
40 |
-
t = []
|
41 |
-
for match in re.finditer(self.role_special_token_expression, s):
|
42 |
-
if last_index < match.start():
|
43 |
-
t.extend(self.sp_model.EncodeAsPieces(s[last_index:match.start()]))
|
44 |
-
t.append(s[match.start():match.end()])
|
45 |
-
last_index = match.end()
|
46 |
-
if last_index < len(s):
|
47 |
-
t.extend(self.sp_model.EncodeAsPieces(s[last_index:]))
|
48 |
-
return t
|
49 |
-
else:
|
50 |
-
return self.sp_model.EncodeAsPieces(s)
|
51 |
|
52 |
def encode(self, s: str, bos: bool = False, eos: bool = False) -> List[int]:
|
53 |
assert type(s) is str
|
@@ -86,40 +70,27 @@ class SPTokenizer:
|
|
86 |
"""Converts an index (integer) in a token (str) using the vocab."""
|
87 |
if index in self.index_special_tokens:
|
88 |
return self.index_special_tokens[index]
|
89 |
-
if index in [self.eos_id, self.bos_id, self.pad_id] or index < 0
|
90 |
return ""
|
91 |
return self.sp_model.IdToPiece(index)
|
92 |
|
93 |
|
94 |
class ChatGLMTokenizer(PreTrainedTokenizer):
|
95 |
-
|
96 |
vocab_files_names = {"vocab_file": "tokenizer.model"}
|
|
|
97 |
model_input_names = ["input_ids", "attention_mask", "position_ids"]
|
98 |
|
99 |
-
def __init__(
|
100 |
-
self,
|
101 |
-
vocab_file,
|
102 |
-
padding_side="left",
|
103 |
-
clean_up_tokenization_spaces=False,
|
104 |
-
encode_special_tokens=False,
|
105 |
-
**kwargs
|
106 |
-
):
|
107 |
self.name = "GLMTokenizer"
|
|
|
108 |
self.vocab_file = vocab_file
|
109 |
self.tokenizer = SPTokenizer(vocab_file)
|
110 |
self.special_tokens = {
|
111 |
"<bos>": self.tokenizer.bos_id,
|
112 |
"<eos>": self.tokenizer.eos_id,
|
113 |
-
"<unk>": self.tokenizer.pad_id,
|
114 |
"<pad>": self.tokenizer.pad_id
|
115 |
}
|
116 |
-
|
117 |
-
|
118 |
-
super().__init__(
|
119 |
-
padding_side=padding_side,
|
120 |
-
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
121 |
-
**kwargs
|
122 |
-
)
|
123 |
|
124 |
def get_command(self, token):
|
125 |
if token in self.special_tokens:
|
@@ -129,40 +100,24 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
129 |
|
130 |
@property
|
131 |
def unk_token(self) -> str:
|
132 |
-
return
|
133 |
|
134 |
@property
|
135 |
def pad_token(self) -> str:
|
136 |
-
return
|
137 |
|
138 |
@property
|
139 |
-
def
|
140 |
-
return self.
|
141 |
-
|
142 |
-
@property
|
143 |
-
def unk_token_id(self) -> int:
|
144 |
-
return self.get_command("<unk>")
|
145 |
|
146 |
@property
|
147 |
-
def
|
148 |
-
return
|
149 |
|
150 |
@property
|
151 |
def eos_token_id(self):
|
152 |
return self.get_command("<eos>")
|
153 |
|
154 |
-
@unk_token.setter
|
155 |
-
def unk_token(self, value):
|
156 |
-
logger.warning("Setting unk_token is not supported, use the default one.")
|
157 |
-
|
158 |
-
@pad_token.setter
|
159 |
-
def pad_token(self, value):
|
160 |
-
logger.warning("Setting pad_token is not supported, use the default one.")
|
161 |
-
|
162 |
-
@eos_token.setter
|
163 |
-
def eos_token(self, value):
|
164 |
-
logger.warning("Setting eos_token is not supported, use the default one.")
|
165 |
-
|
166 |
@property
|
167 |
def vocab_size(self):
|
168 |
return self.tokenizer.n_words
|
@@ -174,7 +129,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
174 |
return vocab
|
175 |
|
176 |
def _tokenize(self, text, **kwargs):
|
177 |
-
return self.tokenizer.tokenize(text
|
178 |
|
179 |
def _convert_token_to_id(self, token):
|
180 |
""" Converts a token (str) in an id using the vocab. """
|
@@ -240,7 +195,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
240 |
return self.batch_encode_plus([input_ids], return_tensors="pt", is_split_into_words=True)
|
241 |
|
242 |
def build_inputs_with_special_tokens(
|
243 |
-
|
244 |
) -> List[int]:
|
245 |
"""
|
246 |
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
@@ -265,12 +220,12 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
265 |
return token_ids_0
|
266 |
|
267 |
def _pad(
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
) -> dict:
|
275 |
"""
|
276 |
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
|
|
|
1 |
import json
|
2 |
import os
|
3 |
+
import torch
|
4 |
from typing import List, Optional, Union, Dict
|
5 |
from sentencepiece import SentencePieceProcessor
|
6 |
from transformers import PreTrainedTokenizer
|
|
|
8 |
from transformers.tokenization_utils_base import EncodedInput, BatchEncoding
|
9 |
|
10 |
|
|
|
|
|
|
|
11 |
class SPTokenizer:
|
12 |
def __init__(self, model_path: str):
|
13 |
# reload tokenizer
|
|
|
21 |
self.pad_id: int = self.sp_model.unk_id()
|
22 |
assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
|
23 |
|
24 |
+
special_tokens = ["[MASK]", "[gMASK]", "[sMASK]", "sop", "eop", "<|system|>", "<|user|>", "<|assistant|>",
|
25 |
+
"<|observation|>"]
|
26 |
self.special_tokens = {}
|
27 |
self.index_special_tokens = {}
|
28 |
for token in special_tokens:
|
29 |
self.special_tokens[token] = self.n_words
|
30 |
self.index_special_tokens[self.n_words] = token
|
31 |
self.n_words += 1
|
32 |
+
|
33 |
+
def tokenize(self, s: str):
|
34 |
+
return self.sp_model.EncodeAsPieces(s)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
def encode(self, s: str, bos: bool = False, eos: bool = False) -> List[int]:
|
37 |
assert type(s) is str
|
|
|
70 |
"""Converts an index (integer) in a token (str) using the vocab."""
|
71 |
if index in self.index_special_tokens:
|
72 |
return self.index_special_tokens[index]
|
73 |
+
if index in [self.eos_id, self.bos_id, self.pad_id] or index < 0:
|
74 |
return ""
|
75 |
return self.sp_model.IdToPiece(index)
|
76 |
|
77 |
|
78 |
class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
|
79 |
vocab_files_names = {"vocab_file": "tokenizer.model"}
|
80 |
+
|
81 |
model_input_names = ["input_ids", "attention_mask", "position_ids"]
|
82 |
|
83 |
+
def __init__(self, vocab_file, padding_side="left", clean_up_tokenization_spaces=False, **kwargs):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
self.name = "GLMTokenizer"
|
85 |
+
|
86 |
self.vocab_file = vocab_file
|
87 |
self.tokenizer = SPTokenizer(vocab_file)
|
88 |
self.special_tokens = {
|
89 |
"<bos>": self.tokenizer.bos_id,
|
90 |
"<eos>": self.tokenizer.eos_id,
|
|
|
91 |
"<pad>": self.tokenizer.pad_id
|
92 |
}
|
93 |
+
super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=clean_up_tokenization_spaces, **kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
def get_command(self, token):
|
96 |
if token in self.special_tokens:
|
|
|
100 |
|
101 |
@property
|
102 |
def unk_token(self) -> str:
|
103 |
+
return "<unk>"
|
104 |
|
105 |
@property
|
106 |
def pad_token(self) -> str:
|
107 |
+
return "<unk>"
|
108 |
|
109 |
@property
|
110 |
+
def pad_token_id(self):
|
111 |
+
return self.get_command("<pad>")
|
|
|
|
|
|
|
|
|
112 |
|
113 |
@property
|
114 |
+
def eos_token(self) -> str:
|
115 |
+
return "</s>"
|
116 |
|
117 |
@property
|
118 |
def eos_token_id(self):
|
119 |
return self.get_command("<eos>")
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
@property
|
122 |
def vocab_size(self):
|
123 |
return self.tokenizer.n_words
|
|
|
129 |
return vocab
|
130 |
|
131 |
def _tokenize(self, text, **kwargs):
|
132 |
+
return self.tokenizer.tokenize(text)
|
133 |
|
134 |
def _convert_token_to_id(self, token):
|
135 |
""" Converts a token (str) in an id using the vocab. """
|
|
|
195 |
return self.batch_encode_plus([input_ids], return_tensors="pt", is_split_into_words=True)
|
196 |
|
197 |
def build_inputs_with_special_tokens(
|
198 |
+
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
199 |
) -> List[int]:
|
200 |
"""
|
201 |
Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
|
|
|
220 |
return token_ids_0
|
221 |
|
222 |
def _pad(
|
223 |
+
self,
|
224 |
+
encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
|
225 |
+
max_length: Optional[int] = None,
|
226 |
+
padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
|
227 |
+
pad_to_multiple_of: Optional[int] = None,
|
228 |
+
return_attention_mask: Optional[bool] = None,
|
229 |
) -> dict:
|
230 |
"""
|
231 |
Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
|
tokenizer_config.json
CHANGED
@@ -1,52 +1,12 @@
|
|
1 |
{
|
2 |
-
"
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
"normalized": true,
|
7 |
-
"rstrip": false,
|
8 |
-
"single_word": false,
|
9 |
-
"special": false
|
10 |
-
},
|
11 |
-
"64792": {
|
12 |
-
"content": "sop",
|
13 |
-
"lstrip": false,
|
14 |
-
"normalized": true,
|
15 |
-
"rstrip": false,
|
16 |
-
"single_word": false,
|
17 |
-
"special": false
|
18 |
-
},
|
19 |
-
"64795": {
|
20 |
-
"content": "<|user|>",
|
21 |
-
"lstrip": false,
|
22 |
-
"normalized": true,
|
23 |
-
"rstrip": false,
|
24 |
-
"single_word": false,
|
25 |
-
"special": false
|
26 |
-
},
|
27 |
-
"64796": {
|
28 |
-
"content": "<|assistant|>",
|
29 |
-
"lstrip": false,
|
30 |
-
"normalized": true,
|
31 |
-
"rstrip": false,
|
32 |
-
"single_word": false,
|
33 |
-
"special": false
|
34 |
-
}
|
35 |
-
},
|
36 |
"auto_map": {
|
37 |
"AutoTokenizer": [
|
38 |
"tokenization_chatglm.ChatGLMTokenizer",
|
39 |
null
|
40 |
-
|
41 |
-
}
|
42 |
-
"chat_template": "{% for message in messages %}{% if loop.first %}[gMASK]sop<|{{ message['role'] }}|>\n {{ message['content'] }}{% else %}<|{{ message['role'] }}|>\n {{ message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
|
43 |
-
"clean_up_tokenization_spaces": false,
|
44 |
-
"do_lower_case": false,
|
45 |
-
"eos_token": "</s>",
|
46 |
-
"model_max_length": 1000000000000000019884624838656,
|
47 |
-
"pad_token": "<unk>",
|
48 |
-
"padding_side": "left",
|
49 |
-
"remove_space": false,
|
50 |
-
"tokenizer_class": "ChatGLMTokenizer",
|
51 |
-
"unk_token": "<unk>"
|
52 |
}
|
|
|
1 |
{
|
2 |
+
"name_or_path": "THUDM/chatglm3-6b",
|
3 |
+
"remove_space": false,
|
4 |
+
"do_lower_case": false,
|
5 |
+
"tokenizer_class": "ChatGLMTokenizer",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
"auto_map": {
|
7 |
"AutoTokenizer": [
|
8 |
"tokenization_chatglm.ChatGLMTokenizer",
|
9 |
null
|
10 |
+
]
|
11 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
}
|