duzx16 commited on
Commit
10d4977
2 Parent(s): 44011aa ef8e833

Merge branch 'main' of https://huggingface.co/THUDM/glm-4-9b

Browse files
Files changed (3) hide show
  1. LICENSE +1 -1
  2. README.md +7 -8
  3. modeling_chatglm.py +4 -7
LICENSE CHANGED
@@ -45,7 +45,7 @@ The glm-4-9b License
45
 
46
  2. License
47
 
48
- Subject to the terms and conditions of this License, Licensor hereby grants you a non-exclusive, worldwide, irrevocable, non-sublicensable, revocable, photo-free copyright license.
49
  This license allows you to use all open source models in this repository for free for academic research. For users who wish to use the models for commercial purposes, please do so [here](https://open.bigmodel.cn/mla/form)
50
  Complete registration. Registered users are free to use this model for commercial activities, but must comply with all terms and conditions of this license.
51
  The copyright notice and this license notice shall be included in all copies or substantial portions of the Software.
 
45
 
46
  2. License
47
 
48
+ Under the terms and conditions of this license, the Licensor hereby grants you a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable, royalty-free copyright license.
49
  This license allows you to use all open source models in this repository for free for academic research. For users who wish to use the models for commercial purposes, please do so [here](https://open.bigmodel.cn/mla/form)
50
  Complete registration. Registered users are free to use this model for commercial activities, but must comply with all terms and conditions of this license.
51
  The copyright notice and this license notice shall be included in all copies or substantial portions of the Software.
README.md CHANGED
@@ -2,15 +2,15 @@
2
  license: other
3
  license_name: glm-4
4
  license_link: https://huggingface.co/THUDM/glm-4-9b/LICENSE
5
-
6
  language:
7
- - zh
8
- - en
9
  tags:
10
- - glm
11
- - chatglm
12
- - thudm
13
  inference: false
 
14
  ---
15
 
16
  # GLM-4-9B
@@ -62,5 +62,4 @@ GLM-4 模型的权重的使用则需要遵循 [LICENSE](LICENSE)。
62
  pages={320--335},
63
  year={2022}
64
  }
65
- ```
66
-
 
2
  license: other
3
  license_name: glm-4
4
  license_link: https://huggingface.co/THUDM/glm-4-9b/LICENSE
 
5
  language:
6
+ - zh
7
+ - en
8
  tags:
9
+ - glm
10
+ - chatglm
11
+ - thudm
12
  inference: false
13
+ pipeline_tag: text-generation
14
  ---
15
 
16
  # GLM-4-9B
 
62
  pages={320--335},
63
  year={2022}
64
  }
65
+ ```
 
modeling_chatglm.py CHANGED
@@ -253,15 +253,12 @@ class CoreAttention(torch.nn.Module):
253
  # This is actually dropping out entire tokens to attend to, which might
254
  # seem a bit unusual, but is taken from the original Transformer paper.
255
  attention_probs = self.attention_dropout(attention_probs)
256
- # =========================
257
- # Context layer. [sq, b, hp]
258
- # =========================
259
-
260
- # value_layer -> context layer.
261
- # [sk, b, np, hn] --> [b, np, sq, hn]
262
 
 
 
 
263
  # context layer shape: [b, np, sq, hn]
264
- output_size = (value_layer.size(1), value_layer.size(2), query_layer.size(0), value_layer.size(3))
265
  # change view [b * np, sk, hn]
266
  value_layer = value_layer.view(output_size[0] * output_size[1], value_layer.size(2), -1)
267
  # change view [b * np, sq, sk]
 
253
  # This is actually dropping out entire tokens to attend to, which might
254
  # seem a bit unusual, but is taken from the original Transformer paper.
255
  attention_probs = self.attention_dropout(attention_probs)
 
 
 
 
 
 
256
 
257
+ # query layer shape: [b * np, sq, hn]
258
+ # value layer shape: [b, np, sk, hn]
259
+ # attention shape: [b, np, sq, sk]
260
  # context layer shape: [b, np, sq, hn]
261
+ output_size = (value_layer.size(0), value_layer.size(1), query_layer.size(1), value_layer.size(3))
262
  # change view [b * np, sk, hn]
263
  value_layer = value_layer.view(output_size[0] * output_size[1], value_layer.size(2), -1)
264
  # change view [b * np, sq, sk]