NewBreaker commited on
Commit
3129ec8
1 Parent(s): 8f59db6
Files changed (36) hide show
  1. .gitattributes +31 -7
  2. ChatGLM-6b-int4(origin)/.gitattributes +0 -34
  3. ChatGLM-6b-int4(origin)/.idea/.gitignore +8 -0
  4. ChatGLM-6b-int4(origin)/.idea/ChatGLM-6b-int4(origin).iml +12 -0
  5. ChatGLM-6b-int4(origin)/.idea/inspectionProfiles/Project_Default.xml +5 -0
  6. ChatGLM-6b-int4(origin)/.idea/inspectionProfiles/profiles_settings.xml +6 -0
  7. ChatGLM-6b-int4(origin)/.idea/misc.xml +7 -0
  8. ChatGLM-6b-int4(origin)/.idea/modules.xml +8 -0
  9. ChatGLM-6b-int4(origin)/.idea/vcs.xml +6 -0
  10. ChatGLM-6b-int4(origin)/README.md +0 -111
  11. ChatGLM-6b-int4(origin)/tokenizer_config.json +0 -20
  12. ChatGLM-6b-int4(origin)/LICENSE → LICENSE +0 -0
  13. ChatGLM-6b-int4(origin)/MODEL_LICENSE → MODEL_LICENSE +0 -0
  14. README.md +59 -93
  15. ChatGLM-6b-int4(origin)/configuration_chatglm.py → configuration_chatglm.py +0 -0
  16. demo/.gitattributes +10 -0
  17. demo/README.md +145 -0
  18. {ChatGLM-6b-int4(origin) → demo}/config.json +0 -0
  19. flax_model.msgpack → demo/flax_model.msgpack +0 -0
  20. merges.txt → demo/merges.txt +0 -0
  21. model.safetensors → demo/model.safetensors +0 -0
  22. {ChatGLM-6b-int4(origin) → demo}/pytorch_model.bin +2 -2
  23. rust_model.ot → demo/rust_model.ot +0 -0
  24. special_tokens_map.json → demo/special_tokens_map.json +0 -0
  25. tf_model.h5 → demo/tf_model.h5 +0 -0
  26. demo/tokenizer_config.json +2 -0
  27. vocab.json → demo/vocab.json +0 -0
  28. ChatGLM-6b-int4(origin)/demo_api.py → demo_api.py +0 -0
  29. ChatGLM-6b-int4(origin)/ice_text.model → ice_text.model +0 -0
  30. ChatGLM-6b-int4(origin)/modeling_chatglm.py → modeling_chatglm.py +0 -0
  31. pytorch_model.bin +2 -2
  32. ChatGLM-6b-int4(origin)/quantization.py → quantization.py +0 -0
  33. ChatGLM-6b-int4(origin)/quantization_kernels.c → quantization_kernels.c +0 -0
  34. ChatGLM-6b-int4(origin)/quantization_kernels_parallel.c → quantization_kernels_parallel.c +0 -0
  35. ChatGLM-6b-int4(origin)/tokenization_chatglm.py → tokenization_chatglm.py +0 -0
  36. tokenizer_config.json +20 -1
.gitattributes CHANGED
@@ -1,10 +1,34 @@
1
- *.bin.* filter=lfs diff=lfs merge=lfs -text
2
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
 
 
 
4
  *.h5 filter=lfs diff=lfs merge=lfs -text
5
- *.tflite filter=lfs diff=lfs merge=lfs -text
6
- *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
- *.ot filter=lfs diff=lfs merge=lfs -text
8
- *.onnx filter=lfs diff=lfs merge=lfs -text
9
  *.msgpack filter=lfs diff=lfs merge=lfs -text
10
- model.safetensors filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
ChatGLM-6b-int4(origin)/.gitattributes DELETED
@@ -1,34 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tflite filter=lfs diff=lfs merge=lfs -text
29
- *.tgz filter=lfs diff=lfs merge=lfs -text
30
- *.wasm filter=lfs diff=lfs merge=lfs -text
31
- *.xz filter=lfs diff=lfs merge=lfs -text
32
- *.zip filter=lfs diff=lfs merge=lfs -text
33
- *.zst filter=lfs diff=lfs merge=lfs -text
34
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ChatGLM-6b-int4(origin)/.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
ChatGLM-6b-int4(origin)/.idea/ChatGLM-6b-int4(origin).iml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ <component name="PyDocumentationSettings">
9
+ <option name="format" value="PLAIN" />
10
+ <option name="myDocStringFormat" value="Plain" />
11
+ </component>
12
+ </module>
ChatGLM-6b-int4(origin)/.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ </profile>
5
+ </component>
ChatGLM-6b-int4(origin)/.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
ChatGLM-6b-int4(origin)/.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="MarkdownSettingsMigration">
4
+ <option name="stateVersion" value="1" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8" project-jdk-type="Python SDK" />
7
+ </project>
ChatGLM-6b-int4(origin)/.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/ChatGLM-6b-int4(origin).iml" filepath="$PROJECT_DIR$/.idea/ChatGLM-6b-int4(origin).iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
ChatGLM-6b-int4(origin)/.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
5
+ </component>
6
+ </project>
ChatGLM-6b-int4(origin)/README.md DELETED
@@ -1,111 +0,0 @@
1
- ---
2
- language: en
3
- license: cc-by-4.0
4
- datasets:
5
- - squad_v2
6
- model-index:
7
- - name: deepset/roberta-base-squad2
8
- results:
9
- - task:
10
- type: question-answering
11
- name: Question Answering
12
- dataset:
13
- name: squad_v2
14
- type: squad_v2
15
- config: squad_v2
16
- split: validation
17
- metrics:
18
- - type: exact_match
19
- value: 79.9309
20
- name: Exact Match
21
- verified: true
22
- verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMDhhNjg5YzNiZGQ1YTIyYTAwZGUwOWEzZTRiYzdjM2QzYjA3ZTUxNDM1NjE1MTUyMjE1MGY1YzEzMjRjYzVjYiIsInZlcnNpb24iOjF9.EH5JJo8EEFwU7osPz3s7qanw_tigeCFhCXjSfyN0Y1nWVnSfulSxIk_DbAEI5iE80V4EKLyp5-mYFodWvL2KDA
23
- - type: f1
24
- value: 82.9501
25
- name: F1
26
- verified: true
27
- verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMjk5ZDYwOGQyNjNkMWI0OTE4YzRmOTlkY2JjNjQ0YTZkNTMzMzNkYTA0MDFmNmI3NjA3NjNlMjhiMDQ2ZjJjNSIsInZlcnNpb24iOjF9.DDm0LNTkdLbGsue58bg1aH_s67KfbcmkvL-6ZiI2s8IoxhHJMSf29H_uV2YLyevwx900t-MwTVOW3qfFnMMEAQ
28
- - type: total
29
- value: 11869
30
- name: total
31
- verified: true
32
- verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMGFkMmI2ODM0NmY5NGNkNmUxYWViOWYxZDNkY2EzYWFmOWI4N2VhYzY5MGEzMTVhOTU4Zjc4YWViOGNjOWJjMCIsInZlcnNpb24iOjF9.fexrU1icJK5_MiifBtZWkeUvpmFISqBLDXSQJ8E6UnrRof-7cU0s4tX_dIsauHWtUpIHMPZCf5dlMWQKXZuAAA
33
-
34
- ---
35
-
36
-
37
-
38
-
39
-
40
- # ChatGLM-6B-INT4
41
- <p align="center">
42
- 👋 Join our <a href="https://join.slack.com/t/chatglm/shared_invite/zt-1th2q5u69-7tURzFuOPanmuHy9hsZnKA" target="_blank">Slack</a> and <a href="https://github.com/THUDM/ChatGLM-6B/blob/main/resources/WECHAT.md" target="_blank">WeChat</a>
43
- </p>
44
-
45
- ## 介绍
46
- ChatGLM-6B 是一个开源的、支持中英双语问答的对话语言模型,基于 [General Language Model (GLM)](https://github.com/THUDM/GLM) 架构,具有 62 亿参数。结合模型量化技术,用户可以在消费级的显卡上进行本地部署(INT4 量化级别下最低只需 6GB 显存)。ChatGLM-6B 使用了和 [ChatGLM](https://chatglm.cn) 相同的技术,针对中文问答和对话进行了优化。经过约 1T 标识符的中英双语训练,辅以监督微调、反馈自助、人类反馈强化学习等技术的加持,62 亿参数的 ChatGLM-6B 已经能生成相当符合人类偏好的回答。
47
-
48
- ChatGLM-6B-INT4 是 ChatGLM-6B 量化后的模型权重。具体的,ChatGLM-6B-INT4 对 ChatGLM-6B 中的 28 个 GLM Block 进行了 INT4 量化,没有对 Embedding 和 LM Head 进行量化。量化后的模型理论上 6G 显存(使用 CPU 即内存)即可推理,具有在嵌入式设备(如树莓派)上运行的可能。
49
-
50
- 在 CPU 上运行时,会根据硬件自动编译 CPU Kernel ,请确保已安装 GCC 和 OpenMP (Linux一般已安装,对于Windows则需手动安装),以获得最佳并行计算能力。
51
-
52
- ## 软件依赖
53
-
54
- ```shell
55
- pip install protobuf transformers==4.27.1 cpm_kernels
56
- ```
57
-
58
- ## 代码调用
59
-
60
- 可以通过如下代码调用 ChatGLM-6B 模型来生成对话:
61
-
62
- ```ipython
63
- >>> from transformers import AutoTokenizer, AutoModel
64
- >>> tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
65
- >>> model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).half().cuda()
66
- >>> response, history = model.chat(tokenizer, "你好", history=[])
67
- >>> print(response)
68
- 你好👋!我是人工智能助手 ChatGLM-6B,很高兴见到你,欢迎问我任何问题。
69
- >>> response, history = model.chat(tokenizer, "晚上睡不着应该怎么办", history=history)
70
- >>> print(response)
71
- 晚上睡不着可能会让你感到焦虑或不舒服,但以下是一些可以帮助你入睡的方法:
72
-
73
- 1. 制定规律的睡眠时间表:保持规律的睡眠时间表可以帮助你建立健康的睡眠习惯,使你更容易入睡。尽量在每天的相同时间上床,并在同一时间起床。
74
- 2. 创造一个舒适的睡眠环境:确保睡眠环境舒适,安静,黑暗且温度适宜。可以使用舒适的床上用品,并保持房间通风。
75
- 3. 放松身心:在睡前做些放松的活动,例如泡个热水澡,听些轻柔的音乐,阅读一些有趣的书籍等,有助于缓解紧张和焦虑,使你更容易入睡。
76
- 4. 避免饮用含有咖啡因的饮料:咖啡因是一种刺激性物质,会影响你的睡眠质量。尽量避免在睡前饮用含有咖啡因的饮料,例如咖啡,茶和可乐。
77
- 5. 避免在床上做与睡眠无关的事情:在床上做些与睡眠无关的事情,例如看电影,玩游戏或工作等,可能会干扰你的睡眠。
78
- 6. 尝试呼吸技巧:深呼吸是一种放松技巧,可以帮助你缓解紧张和焦虑,使你更容易入睡。试着慢慢吸气,保持几秒钟,然后缓慢呼气。
79
-
80
- 如果这些方法无法帮助你入睡,你可以考虑咨询医生或睡眠专家,寻求进一步的建议。
81
- ```
82
-
83
- 关于更多的使用说明,包括如何运行命令行和网页版本的 DEMO,以及使用模型量化以节省显存,请参考我们的 [Github Repo](https://github.com/THUDM/ChatGLM-6B)。
84
-
85
- ## 协议
86
-
87
- 本仓库���代码依照 [Apache-2.0](LICENSE) 协议开源,ChatGLM-6B 模型的权重的使用则需要遵循 [Model License](MODEL_LICENSE)。
88
-
89
- ## 引用
90
-
91
- 如果你觉得我们的工作有帮助的话,请考虑引用下列论文:
92
-
93
- ```
94
- @inproceedings{
95
- zeng2023glm-130b,
96
- title={{GLM}-130B: An Open Bilingual Pre-trained Model},
97
- author={Aohan Zeng and Xiao Liu and Zhengxiao Du and Zihan Wang and Hanyu Lai and Ming Ding and Zhuoyi Yang and Yifan Xu and Wendi Zheng and Xiao Xia and Weng Lam Tam and Zixuan Ma and Yufei Xue and Jidong Zhai and Wenguang Chen and Zhiyuan Liu and Peng Zhang and Yuxiao Dong and Jie Tang},
98
- booktitle={The Eleventh International Conference on Learning Representations (ICLR)},
99
- year={2023},
100
- url={https://openreview.net/forum?id=-Aw0rrrPUF}
101
- }
102
- ```
103
- ```
104
- @inproceedings{du2022glm,
105
- title={GLM: General Language Model Pretraining with Autoregressive Blank Infilling},
106
- author={Du, Zhengxiao and Qian, Yujie and Liu, Xiao and Ding, Ming and Qiu, Jiezhong and Yang, Zhilin and Tang, Jie},
107
- booktitle={Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
108
- pages={320--335},
109
- year={2022}
110
- }
111
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ChatGLM-6b-int4(origin)/tokenizer_config.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "name_or_path": "THUDM/chatglm-6b-int4",
3
- "bos_token": "<sop>",
4
- "eos_token": "<eop>",
5
- "end_token": "</s>",
6
- "gmask_token": "[gMASK]",
7
- "mask_token": "[MASK]",
8
- "pad_token": "<pad>",
9
- "unk_token": "<unk>",
10
- "remove_space": false,
11
- "do_lower_case": false,
12
- "tokenizer_class": "ChatGLMTokenizer",
13
- "num_image_tokens": 0,
14
- "auto_map": {
15
- "AutoTokenizer": [
16
- "tokenization_chatglm.ChatGLMTokenizer",
17
- null
18
- ]
19
- }
20
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ChatGLM-6b-int4(origin)/LICENSE → LICENSE RENAMED
File without changes
ChatGLM-6b-int4(origin)/MODEL_LICENSE → MODEL_LICENSE RENAMED
File without changes
README.md CHANGED
@@ -30,116 +30,82 @@ model-index:
30
  name: total
31
  verified: true
32
  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMGFkMmI2ODM0NmY5NGNkNmUxYWViOWYxZDNkY2EzYWFmOWI4N2VhYzY5MGEzMTVhOTU4Zjc4YWViOGNjOWJjMCIsInZlcnNpb24iOjF9.fexrU1icJK5_MiifBtZWkeUvpmFISqBLDXSQJ8E6UnrRof-7cU0s4tX_dIsauHWtUpIHMPZCf5dlMWQKXZuAAA
 
33
  ---
34
 
35
- # roberta-base for QA
36
 
37
- This is the [roberta-base](https://huggingface.co/roberta-base) model, fine-tuned using the [SQuAD2.0](https://huggingface.co/datasets/squad_v2) dataset. It's been trained on question-answer pairs, including unanswerable questions, for the task of Question Answering.
38
 
39
 
40
- ## Overview
41
- **Language model:** roberta-base
42
- **Language:** English
43
- **Downstream-task:** Extractive QA
44
- **Training data:** SQuAD 2.0
45
- **Eval data:** SQuAD 2.0
46
- **Code:** See [an example QA pipeline on Haystack](https://haystack.deepset.ai/tutorials/first-qa-system)
47
- **Infrastructure**: 4x Tesla v100
48
 
49
- ## Hyperparameters
 
 
 
50
 
51
- ```
52
- batch_size = 96
53
- n_epochs = 2
54
- base_LM_model = "roberta-base"
55
- max_seq_len = 386
56
- learning_rate = 3e-5
57
- lr_schedule = LinearWarmup
58
- warmup_proportion = 0.2
59
- doc_stride=128
60
- max_query_length=64
61
- ```
62
-
63
- ## Using a distilled model instead
64
- Please note that we have also released a distilled version of this model called [deepset/tinyroberta-squad2](https://huggingface.co/deepset/tinyroberta-squad2). The distilled model has a comparable prediction quality and runs at twice the speed of the base model.
65
-
66
- ## Usage
67
-
68
- ### In Haystack
69
- Haystack is an NLP framework by deepset. You can use this model in a Haystack pipeline to do question answering at scale (over many documents). To load the model in [Haystack](https://github.com/deepset-ai/haystack/):
70
- ```python
71
- reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2")
72
- # or
73
- reader = TransformersReader(model_name_or_path="deepset/roberta-base-squad2",tokenizer="deepset/roberta-base-squad2")
74
- ```
75
- For a complete example of ``roberta-base-squad2`` being used for Question Answering, check out the [Tutorials in Haystack Documentation](https://haystack.deepset.ai/tutorials/first-qa-system)
76
 
77
- ### In Transformers
78
- ```python
79
- from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
80
 
81
- model_name = "deepset/roberta-base-squad2"
82
 
83
- # a) Get predictions
84
- nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
85
- QA_input = {
86
- 'question': 'Why is model conversion important?',
87
- 'context': 'The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks.'
88
- }
89
- res = nlp(QA_input)
90
 
91
- # b) Load model & tokenizer
92
- model = AutoModelForQuestionAnswering.from_pretrained(model_name)
93
- tokenizer = AutoTokenizer.from_pretrained(model_name)
94
  ```
95
 
96
- ## Performance
97
- Evaluated on the SQuAD 2.0 dev set with the [official eval script](https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/).
98
-
99
- ```
100
- "exact": 79.87029394424324,
101
- "f1": 82.91251169582613,
102
-
103
- "total": 11873,
104
- "HasAns_exact": 77.93522267206478,
105
- "HasAns_f1": 84.02838248389763,
106
- "HasAns_total": 5928,
107
- "NoAns_exact": 81.79983179142137,
108
- "NoAns_f1": 81.79983179142137,
109
- "NoAns_total": 5945
 
 
 
 
 
 
 
 
 
110
  ```
111
 
112
- ## Authors
113
- **Branden Chan:** branden.chan@deepset.ai
114
- **Timo Möller:** timo.moeller@deepset.ai
115
- **Malte Pietsch:** malte.pietsch@deepset.ai
116
- **Tanay Soni:** tanay.soni@deepset.ai
117
-
118
- ## About us
119
-
120
- <div class="grid lg:grid-cols-2 gap-x-4 gap-y-3">
121
- <div class="w-full h-40 object-cover mb-2 rounded-lg flex items-center justify-center">
122
- <img alt="" src="https://raw.githubusercontent.com/deepset-ai/.github/main/deepset-logo-colored.png" class="w-40"/>
123
- </div>
124
- <div class="w-full h-40 object-cover mb-2 rounded-lg flex items-center justify-center">
125
- <img alt="" src="https://raw.githubusercontent.com/deepset-ai/.github/main/haystack-logo-colored.png" class="w-40"/>
126
- </div>
127
- </div>
128
 
129
- [deepset](http://deepset.ai/) is the company behind the open-source NLP framework [Haystack](https://haystack.deepset.ai/) which is designed to help you build production ready NLP systems that use: Question answering, summarization, ranking etc.
130
 
 
131
 
132
- Some of our other work:
133
- - [Distilled roberta-base-squad2 (aka "tinyroberta-squad2")]([https://huggingface.co/deepset/tinyroberta-squad2)
134
- - [German BERT (aka "bert-base-german-cased")](https://deepset.ai/german-bert)
135
- - [GermanQuAD and GermanDPR datasets and models (aka "gelectra-base-germanquad", "gbert-base-germandpr")](https://deepset.ai/germanquad)
136
 
137
- ## Get in touch and join the Haystack community
138
 
139
- <p>For more info on Haystack, visit our <strong><a href="https://github.com/deepset-ai/haystack">GitHub</a></strong> repo and <strong><a href="https://docs.haystack.deepset.ai">Documentation</a></strong>.
140
-
141
- We also have a <strong><a class="h-7" href="https://haystack.deepset.ai/community">Discord community open to everyone!</a></strong></p>
142
-
143
- [Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Discord](https://haystack.deepset.ai/community) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)
144
-
145
- By the way: [we're hiring!](http://www.deepset.ai/jobs)
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  name: total
31
  verified: true
32
  verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMGFkMmI2ODM0NmY5NGNkNmUxYWViOWYxZDNkY2EzYWFmOWI4N2VhYzY5MGEzMTVhOTU4Zjc4YWViOGNjOWJjMCIsInZlcnNpb24iOjF9.fexrU1icJK5_MiifBtZWkeUvpmFISqBLDXSQJ8E6UnrRof-7cU0s4tX_dIsauHWtUpIHMPZCf5dlMWQKXZuAAA
33
+
34
  ---
35
 
 
36
 
 
37
 
38
 
 
 
 
 
 
 
 
 
39
 
40
+ # ChatGLM-6B-INT4
41
+ <p align="center">
42
+ 👋 Join our <a href="https://join.slack.com/t/chatglm/shared_invite/zt-1th2q5u69-7tURzFuOPanmuHy9hsZnKA" target="_blank">Slack</a> and <a href="https://github.com/THUDM/ChatGLM-6B/blob/main/resources/WECHAT.md" target="_blank">WeChat</a>
43
+ </p>
44
 
45
+ ## 介绍
46
+ ChatGLM-6B 是一个开源的、支持中英双语问答的对话语言模型,基于 [General Language Model (GLM)](https://github.com/THUDM/GLM) 架构,具有 62 亿参数。结合模型量化技术,用户可以在消费级的显卡上进行本地部署(INT4 量化级别下最低只需 6GB 显存)。ChatGLM-6B 使用了和 [ChatGLM](https://chatglm.cn) 相同的技术,针对中文问答和对话进行了优化。经过约 1T 标识符的中英双语训练,辅以监督微调、反馈自助、人类反馈强化学习等技术的加持,62 亿参数的 ChatGLM-6B 已经能生成相当符合人类偏好的回答。
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ ChatGLM-6B-INT4 ChatGLM-6B 量化后的模型权重。具体的,ChatGLM-6B-INT4 对 ChatGLM-6B 中的 28 个 GLM Block 进行了 INT4 量化,没有对 Embedding 和 LM Head 进行量化。量化后的模型理论上 6G 显存(使用 CPU 即内存)即可推理,具有在嵌入式设备(如树莓派)上运行的可能。
 
 
49
 
50
+ CPU 上运行时,会根据硬件自动编译 CPU Kernel ,请确保已安装 GCC 和 OpenMP (Linux一般已安装,对于Windows则需手动安装),以获得最佳并行计算能力。
51
 
52
+ ## 软件依赖
 
 
 
 
 
 
53
 
54
+ ```shell
55
+ pip install protobuf transformers==4.27.1 cpm_kernels
 
56
  ```
57
 
58
+ ## 代码调用
59
+
60
+ 可以通过如下代码调用 ChatGLM-6B 模型来生成对话:
61
+
62
+ ```ipython
63
+ >>> from transformers import AutoTokenizer, AutoModel
64
+ >>> tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True)
65
+ >>> model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).half().cuda()
66
+ >>> response, history = model.chat(tokenizer, "你好", history=[])
67
+ >>> print(response)
68
+ 你好👋!我是人工智能助手 ChatGLM-6B,很高兴见到你,欢迎问我任何问题。
69
+ >>> response, history = model.chat(tokenizer, "晚上睡不着应该怎么办", history=history)
70
+ >>> print(response)
71
+ 晚上睡不着可能会让你感到焦虑或不舒服,但以下是一些可以帮助你入睡的方法:
72
+
73
+ 1. 制定规律的睡眠时间表:保持规律的睡眠时间表可以帮助你建立健康的睡眠习惯,使你更容易入睡。尽量在每天的相同时间上床,并在同一时间起床。
74
+ 2. 创造一个舒适的睡眠环境:确保睡眠环境舒适,安静,黑暗且温度适宜。可以使用舒适的床上用品,并保持房间通风。
75
+ 3. 放松身心:在睡前做些放松的活动,例如泡个热水澡,听些轻柔的音乐,阅读一些有趣的书籍等,有助于缓解紧张和焦虑,使你更容易入睡。
76
+ 4. 避免饮用含有咖啡因的饮料:咖啡因是一种刺激性物质,会影响你的睡眠质量。尽量避免在睡前饮用含有咖啡因的饮料,例如咖啡,茶和可乐。
77
+ 5. 避免在床上做与睡眠无关的事情:在床上做些与睡眠无关的事情,例如看电影,玩游戏或工作等,可能会干扰你的睡眠。
78
+ 6. 尝试呼吸技巧:深呼吸是一种放松技巧,可以帮助你缓解紧张和焦虑,使你更容易入睡。试着慢慢吸气,保持几秒钟,然后缓慢呼气。
79
+
80
+ 如果这些方法无法帮助你入睡,你可以考虑咨询医生或睡眠专家,寻求进一步的建议。
81
  ```
82
 
83
+ 关于更多的使用说明,包括如何运行命令行和网页版本的 DEMO,以及使用模型量化以节省显存,请参考我们的 [Github Repo](https://github.com/THUDM/ChatGLM-6B)。
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
+ ## 协议
86
 
87
+ 本仓库的代码依照 [Apache-2.0](LICENSE) 协议开源,ChatGLM-6B 模型的权重的使用则需要遵循 [Model License](MODEL_LICENSE)。
88
 
89
+ ## 引用
 
 
 
90
 
91
+ 如果你觉得我们的工作有帮助的话,请考虑引用下列论文:
92
 
93
+ ```
94
+ @inproceedings{
95
+ zeng2023glm-130b,
96
+ title={{GLM}-130B: An Open Bilingual Pre-trained Model},
97
+ author={Aohan Zeng and Xiao Liu and Zhengxiao Du and Zihan Wang and Hanyu Lai and Ming Ding and Zhuoyi Yang and Yifan Xu and Wendi Zheng and Xiao Xia and Weng Lam Tam and Zixuan Ma and Yufei Xue and Jidong Zhai and Wenguang Chen and Zhiyuan Liu and Peng Zhang and Yuxiao Dong and Jie Tang},
98
+ booktitle={The Eleventh International Conference on Learning Representations (ICLR)},
99
+ year={2023},
100
+ url={https://openreview.net/forum?id=-Aw0rrrPUF}
101
+ }
102
+ ```
103
+ ```
104
+ @inproceedings{du2022glm,
105
+ title={GLM: General Language Model Pretraining with Autoregressive Blank Infilling},
106
+ author={Du, Zhengxiao and Qian, Yujie and Liu, Xiao and Ding, Ming and Qiu, Jiezhong and Yang, Zhilin and Tang, Jie},
107
+ booktitle={Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
108
+ pages={320--335},
109
+ year={2022}
110
+ }
111
+ ```
ChatGLM-6b-int4(origin)/configuration_chatglm.py → configuration_chatglm.py RENAMED
File without changes
demo/.gitattributes ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
2
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.h5 filter=lfs diff=lfs merge=lfs -text
5
+ *.tflite filter=lfs diff=lfs merge=lfs -text
6
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.ot filter=lfs diff=lfs merge=lfs -text
8
+ *.onnx filter=lfs diff=lfs merge=lfs -text
9
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
10
+ model.safetensors filter=lfs diff=lfs merge=lfs -text
demo/README.md ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ license: cc-by-4.0
4
+ datasets:
5
+ - squad_v2
6
+ model-index:
7
+ - name: deepset/roberta-base-squad2
8
+ results:
9
+ - task:
10
+ type: question-answering
11
+ name: Question Answering
12
+ dataset:
13
+ name: squad_v2
14
+ type: squad_v2
15
+ config: squad_v2
16
+ split: validation
17
+ metrics:
18
+ - type: exact_match
19
+ value: 79.9309
20
+ name: Exact Match
21
+ verified: true
22
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMDhhNjg5YzNiZGQ1YTIyYTAwZGUwOWEzZTRiYzdjM2QzYjA3ZTUxNDM1NjE1MTUyMjE1MGY1YzEzMjRjYzVjYiIsInZlcnNpb24iOjF9.EH5JJo8EEFwU7osPz3s7qanw_tigeCFhCXjSfyN0Y1nWVnSfulSxIk_DbAEI5iE80V4EKLyp5-mYFodWvL2KDA
23
+ - type: f1
24
+ value: 82.9501
25
+ name: F1
26
+ verified: true
27
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMjk5ZDYwOGQyNjNkMWI0OTE4YzRmOTlkY2JjNjQ0YTZkNTMzMzNkYTA0MDFmNmI3NjA3NjNlMjhiMDQ2ZjJjNSIsInZlcnNpb24iOjF9.DDm0LNTkdLbGsue58bg1aH_s67KfbcmkvL-6ZiI2s8IoxhHJMSf29H_uV2YLyevwx900t-MwTVOW3qfFnMMEAQ
28
+ - type: total
29
+ value: 11869
30
+ name: total
31
+ verified: true
32
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMGFkMmI2ODM0NmY5NGNkNmUxYWViOWYxZDNkY2EzYWFmOWI4N2VhYzY5MGEzMTVhOTU4Zjc4YWViOGNjOWJjMCIsInZlcnNpb24iOjF9.fexrU1icJK5_MiifBtZWkeUvpmFISqBLDXSQJ8E6UnrRof-7cU0s4tX_dIsauHWtUpIHMPZCf5dlMWQKXZuAAA
33
+ ---
34
+
35
+ # roberta-base for QA
36
+
37
+ This is the [roberta-base](https://huggingface.co/roberta-base) model, fine-tuned using the [SQuAD2.0](https://huggingface.co/datasets/squad_v2) dataset. It's been trained on question-answer pairs, including unanswerable questions, for the task of Question Answering.
38
+
39
+
40
+ ## Overview
41
+ **Language model:** roberta-base
42
+ **Language:** English
43
+ **Downstream-task:** Extractive QA
44
+ **Training data:** SQuAD 2.0
45
+ **Eval data:** SQuAD 2.0
46
+ **Code:** See [an example QA pipeline on Haystack](https://haystack.deepset.ai/tutorials/first-qa-system)
47
+ **Infrastructure**: 4x Tesla v100
48
+
49
+ ## Hyperparameters
50
+
51
+ ```
52
+ batch_size = 96
53
+ n_epochs = 2
54
+ base_LM_model = "roberta-base"
55
+ max_seq_len = 386
56
+ learning_rate = 3e-5
57
+ lr_schedule = LinearWarmup
58
+ warmup_proportion = 0.2
59
+ doc_stride=128
60
+ max_query_length=64
61
+ ```
62
+
63
+ ## Using a distilled model instead
64
+ Please note that we have also released a distilled version of this model called [deepset/tinyroberta-squad2](https://huggingface.co/deepset/tinyroberta-squad2). The distilled model has a comparable prediction quality and runs at twice the speed of the base model.
65
+
66
+ ## Usage
67
+
68
+ ### In Haystack
69
+ Haystack is an NLP framework by deepset. You can use this model in a Haystack pipeline to do question answering at scale (over many documents). To load the model in [Haystack](https://github.com/deepset-ai/haystack/):
70
+ ```python
71
+ reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2")
72
+ # or
73
+ reader = TransformersReader(model_name_or_path="deepset/roberta-base-squad2",tokenizer="deepset/roberta-base-squad2")
74
+ ```
75
+ For a complete example of ``roberta-base-squad2`` being used for Question Answering, check out the [Tutorials in Haystack Documentation](https://haystack.deepset.ai/tutorials/first-qa-system)
76
+
77
+ ### In Transformers
78
+ ```python
79
+ from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
80
+
81
+ model_name = "deepset/roberta-base-squad2"
82
+
83
+ # a) Get predictions
84
+ nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)
85
+ QA_input = {
86
+ 'question': 'Why is model conversion important?',
87
+ 'context': 'The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks.'
88
+ }
89
+ res = nlp(QA_input)
90
+
91
+ # b) Load model & tokenizer
92
+ model = AutoModelForQuestionAnswering.from_pretrained(model_name)
93
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
94
+ ```
95
+
96
+ ## Performance
97
+ Evaluated on the SQuAD 2.0 dev set with the [official eval script](https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/).
98
+
99
+ ```
100
+ "exact": 79.87029394424324,
101
+ "f1": 82.91251169582613,
102
+
103
+ "total": 11873,
104
+ "HasAns_exact": 77.93522267206478,
105
+ "HasAns_f1": 84.02838248389763,
106
+ "HasAns_total": 5928,
107
+ "NoAns_exact": 81.79983179142137,
108
+ "NoAns_f1": 81.79983179142137,
109
+ "NoAns_total": 5945
110
+ ```
111
+
112
+ ## Authors
113
+ **Branden Chan:** branden.chan@deepset.ai
114
+ **Timo Möller:** timo.moeller@deepset.ai
115
+ **Malte Pietsch:** malte.pietsch@deepset.ai
116
+ **Tanay Soni:** tanay.soni@deepset.ai
117
+
118
+ ## About us
119
+
120
+ <div class="grid lg:grid-cols-2 gap-x-4 gap-y-3">
121
+ <div class="w-full h-40 object-cover mb-2 rounded-lg flex items-center justify-center">
122
+ <img alt="" src="https://raw.githubusercontent.com/deepset-ai/.github/main/deepset-logo-colored.png" class="w-40"/>
123
+ </div>
124
+ <div class="w-full h-40 object-cover mb-2 rounded-lg flex items-center justify-center">
125
+ <img alt="" src="https://raw.githubusercontent.com/deepset-ai/.github/main/haystack-logo-colored.png" class="w-40"/>
126
+ </div>
127
+ </div>
128
+
129
+ [deepset](http://deepset.ai/) is the company behind the open-source NLP framework [Haystack](https://haystack.deepset.ai/) which is designed to help you build production ready NLP systems that use: Question answering, summarization, ranking etc.
130
+
131
+
132
+ Some of our other work:
133
+ - [Distilled roberta-base-squad2 (aka "tinyroberta-squad2")]([https://huggingface.co/deepset/tinyroberta-squad2)
134
+ - [German BERT (aka "bert-base-german-cased")](https://deepset.ai/german-bert)
135
+ - [GermanQuAD and GermanDPR datasets and models (aka "gelectra-base-germanquad", "gbert-base-germandpr")](https://deepset.ai/germanquad)
136
+
137
+ ## Get in touch and join the Haystack community
138
+
139
+ <p>For more info on Haystack, visit our <strong><a href="https://github.com/deepset-ai/haystack">GitHub</a></strong> repo and <strong><a href="https://docs.haystack.deepset.ai">Documentation</a></strong>.
140
+
141
+ We also have a <strong><a class="h-7" href="https://haystack.deepset.ai/community">Discord community open to everyone!</a></strong></p>
142
+
143
+ [Twitter](https://twitter.com/deepset_ai) | [LinkedIn](https://www.linkedin.com/company/deepset-ai/) | [Discord](https://haystack.deepset.ai/community) | [GitHub Discussions](https://github.com/deepset-ai/haystack/discussions) | [Website](https://deepset.ai)
144
+
145
+ By the way: [we're hiring!](http://www.deepset.ai/jobs)
{ChatGLM-6b-int4(origin) → demo}/config.json RENAMED
File without changes
flax_model.msgpack → demo/flax_model.msgpack RENAMED
File without changes
merges.txt → demo/merges.txt RENAMED
File without changes
model.safetensors → demo/model.safetensors RENAMED
File without changes
{ChatGLM-6b-int4(origin) → demo}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35828b49cf23cbae4c27788d4b04fc68c79a276300e09f14d72a49b0b738b4a9
3
- size 3893083075
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0b64ccefc1bcb569b604baea27eb873e5482fdf6eb3ceff1fb5368397db5aed
3
+ size 496313727
rust_model.ot → demo/rust_model.ot RENAMED
File without changes
special_tokens_map.json → demo/special_tokens_map.json RENAMED
File without changes
tf_model.h5 → demo/tf_model.h5 RENAMED
File without changes
demo/tokenizer_config.json ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ {"do_lower_case": false, "model_max_length": 512, "full_tokenizer_file": null}
2
+
vocab.json → demo/vocab.json RENAMED
File without changes
ChatGLM-6b-int4(origin)/demo_api.py → demo_api.py RENAMED
File without changes
ChatGLM-6b-int4(origin)/ice_text.model → ice_text.model RENAMED
File without changes
ChatGLM-6b-int4(origin)/modeling_chatglm.py → modeling_chatglm.py RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0b64ccefc1bcb569b604baea27eb873e5482fdf6eb3ceff1fb5368397db5aed
3
- size 496313727
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35828b49cf23cbae4c27788d4b04fc68c79a276300e09f14d72a49b0b738b4a9
3
+ size 3893083075
ChatGLM-6b-int4(origin)/quantization.py → quantization.py RENAMED
File without changes
ChatGLM-6b-int4(origin)/quantization_kernels.c → quantization_kernels.c RENAMED
File without changes
ChatGLM-6b-int4(origin)/quantization_kernels_parallel.c → quantization_kernels_parallel.c RENAMED
File without changes
ChatGLM-6b-int4(origin)/tokenization_chatglm.py → tokenization_chatglm.py RENAMED
File without changes
tokenizer_config.json CHANGED
@@ -1 +1,20 @@
1
- {"do_lower_case": false, "model_max_length": 512, "full_tokenizer_file": null}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name_or_path": "NewBreaker/chatglm-6b-int4",
3
+ "bos_token": "<sop>",
4
+ "eos_token": "<eop>",
5
+ "end_token": "</s>",
6
+ "gmask_token": "[gMASK]",
7
+ "mask_token": "[MASK]",
8
+ "pad_token": "<pad>",
9
+ "unk_token": "<unk>",
10
+ "remove_space": false,
11
+ "do_lower_case": false,
12
+ "tokenizer_class": "ChatGLMTokenizer",
13
+ "num_image_tokens": 0,
14
+ "auto_map": {
15
+ "AutoTokenizer": [
16
+ "tokenization_chatglm.ChatGLMTokenizer",
17
+ null
18
+ ]
19
+ }
20
+ }