Duplicate from Flmc/DISC-MedLLM
Browse filesCo-authored-by: Eric <Flmc@users.noreply.huggingface.co>
- .gitattributes +35 -0
- README.md +161 -0
- config.json +28 -0
- configuration_baichuan.py +46 -0
- generation_config.json +14 -0
- generation_utils.py +82 -0
- modeling_baichuan.py +572 -0
- pytorch_model-00001-of-00003.bin +3 -0
- pytorch_model-00002-of-00003.bin +3 -0
- pytorch_model-00003-of-00003.bin +3 -0
- pytorch_model.bin.index.json +290 -0
- quantizer.py +123 -0
- special_tokens_map.json +30 -0
- tokenization_baichuan.py +232 -0
- tokenizer.model +3 -0
- tokenizer_config.json +46 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
datasets:
|
4 |
+
- Flmc/DISC-Med-SFT
|
5 |
+
language:
|
6 |
+
- zh
|
7 |
+
tags:
|
8 |
+
- medical
|
9 |
+
---
|
10 |
+
This repository contains the DISC-MedLLM, version of Baichuan-13b-base as the base model.
|
11 |
+
|
12 |
+
**Please note that due to the ongoing development of the project, the model weights in this repository may differ from those in our currently deployed demo.**
|
13 |
+
|
14 |
+
Check [DISC-MedLLM](https://github.com/FudanDISC/DISC-MedLLM) for more information.
|
15 |
+
|
16 |
+
# DISC-MedLLM
|
17 |
+
|
18 |
+
[**Demo**](http://med.fudan-disc.com) | [**Tech Report**](https://arxiv.org/abs/2308.14346)
|
19 |
+
|
20 |
+
This is the repo of DISC-MedLLM, a medical domain-specific LLM designed for conversational healthcare scenarios by [Fudan-DISC](http://fudan-disc.com) lab.
|
21 |
+
|
22 |
+
The following resources have been released:
|
23 |
+
* DISC-Med-SFT Dataset (with out behavioral preference dataset)
|
24 |
+
* Model [weights](https://huggingface.co/Flmc/DISC-MedLLM) of DISC-MedLLM
|
25 |
+
|
26 |
+
You can check this [link](http://medllm.fudan-disc.com) to try our online demo.
|
27 |
+
|
28 |
+
## Overview
|
29 |
+
The DISC-MedLLM is a large-scale domain-specific model designed for conversational healthcare scenarios. It can address a variety of your needs, including medical consultations and treatment inquiries, offering you high-quality health support services.
|
30 |
+
|
31 |
+
The DISC-MedLLM effectively bridges the gap between general language models and real-world medical consultations, as evidenced by experimental results.
|
32 |
+
|
33 |
+
Owing to our goal-oriented strategy and the framework that integrates both LLM and Human in the loop based on real-world doctor-patient dialogues and knowledge graphs, DISC-MedLLM boasts several features:
|
34 |
+
|
35 |
+
* **Knowledge-intensive and reliable**
|
36 |
+
* **Ability of multi-turn inquiry**
|
37 |
+
* **Alignment with human preferences**
|
38 |
+
|
39 |
+
|
40 |
+
## Dataset
|
41 |
+
<!-- In order to align the distribution of actual doctor responses with the intended AI doctor response distribution, our dataset is constructed from five main resources: Real-world Conversations (420k), Knowledge Graph-derived Question-Answer pairs (50k), Artificially Annotated Data aligned with human preferences (2k), MedMCQA (8k), and additional general data (34k). -->
|
42 |
+
|
43 |
+
To train DISC-MedLLM, we construct a high-quality dataset called DISC-Med-SFT consisting of over 470k distinct examples derived from existing medical datasets. We adopt a goal-oriented strategy by selectively reconstructing the dataset using a few deliberately chosen sources. These data sources serve the purpose of assisting LLMs in acquiring medical domain knowledge, aligning behavioral patterns with human preferences, and capturing real-world online medical dialogue distributions.
|
44 |
+
|
45 |
+
<!-- <style type="text/css">
|
46 |
+
.tg {border-collapse:collapse;border-spacing:0;}
|
47 |
+
.tg td{border-color:black;border-style:solid;border-width:1px;font-family:Arial, sans-serif;font-size:14px;
|
48 |
+
overflow:hidden;padding:10px 5px;word-break:normal;}
|
49 |
+
.tg th{border-color:black;border-style:solid;border-width:1px;font-family:Arial, sans-serif;font-size:14px;
|
50 |
+
font-weight:normal;overflow:hidden;padding:10px 5px;word-break:normal;}
|
51 |
+
.tg .tg-9wq8{border-color:inherit;text-align:center;vertical-align:middle}
|
52 |
+
.tg .tg-c3ow{border-color:inherit;text-align:center;vertical-align:top}
|
53 |
+
</style> -->
|
54 |
+
<table class="tg" style="undefined;table-layout: fixed; width: 442px">
|
55 |
+
<colgroup>
|
56 |
+
<col style="width: 204.428571px">
|
57 |
+
<col style="width: 135.428571px">
|
58 |
+
<col style="width: 102.428571px">
|
59 |
+
</colgroup>
|
60 |
+
<thead>
|
61 |
+
<tr>
|
62 |
+
<th class="tg-9wq8" rowspan="2"><br>Dateset</th>
|
63 |
+
<th class="tg-9wq8" rowspan="2"><br>Original Source</th>
|
64 |
+
<th class="tg-9wq8" rowspan="2"><br>Size</th>
|
65 |
+
</tr>
|
66 |
+
<tr>
|
67 |
+
</tr>
|
68 |
+
</thead>
|
69 |
+
<tbody>
|
70 |
+
<tr>
|
71 |
+
<td class="tg-9wq8" rowspan="2">Re-constructed AI Doctor-Patient Dialogue</td>
|
72 |
+
<td class="tg-9wq8">MedDialog</td>
|
73 |
+
<td class="tg-9wq8">400k</td>
|
74 |
+
</tr>
|
75 |
+
<tr>
|
76 |
+
<td class="tg-9wq8">cMedQA2</td>
|
77 |
+
<td class="tg-c3ow">20k</td>
|
78 |
+
</tr>
|
79 |
+
<tr>
|
80 |
+
<td class="tg-c3ow">Knowledge Graph <br>QA pairs</td>
|
81 |
+
<td class="tg-9wq8">CMeKG</td>
|
82 |
+
<td class="tg-9wq8">50k</td>
|
83 |
+
</tr>
|
84 |
+
<tr>
|
85 |
+
<td class="tg-c3ow">Behavior Preference<br>Dataset</td>
|
86 |
+
<td class="tg-9wq8">Manual selection</td>
|
87 |
+
<td class="tg-9wq8">2k</td>
|
88 |
+
</tr>
|
89 |
+
<tr>
|
90 |
+
<td class="tg-9wq8" rowspan="3">Others</td>
|
91 |
+
<td class="tg-c3ow">MedMCQA</td>
|
92 |
+
<td class="tg-c3ow">8k</td>
|
93 |
+
</tr>
|
94 |
+
<tr>
|
95 |
+
<td class="tg-c3ow">MOSS-SFT</td>
|
96 |
+
<td class="tg-c3ow">33k</td>
|
97 |
+
</tr>
|
98 |
+
<tr>
|
99 |
+
<td class="tg-c3ow">Alpaca-GPT4-zh</td>
|
100 |
+
<td class="tg-c3ow">1k</td>
|
101 |
+
</tr>
|
102 |
+
</tbody>
|
103 |
+
</table>
|
104 |
+
|
105 |
+
<br>
|
106 |
+
|
107 |
+
|
108 |
+
## Deploy
|
109 |
+
The current version of DISC-MedLLM is derived from the [Baichuan-13B-Base](https://github.com/baichuan-inc/Baichuan-13B). You can directly download our model weights from the HuggingFace [repository](https://huggingface.co/Flmc/DISC-MedLLM), or automatically obtain them through the demo code.
|
110 |
+
|
111 |
+
|
112 |
+
### Using through hugging face transformers
|
113 |
+
```python
|
114 |
+
>>> import torch
|
115 |
+
>>> from transformers import AutoModelForCausalLM, AutoTokenizer
|
116 |
+
>>> from transformers.generation.utils import GenerationConfig
|
117 |
+
>>> tokenizer = AutoTokenizer.from_pretrained("Flmc/DISC-MedLLM", use_fast=False, trust_remote_code=True)
|
118 |
+
>>> model = AutoModelForCausalLM.from_pretrained("Flmc/DISC-MedLLM", device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)
|
119 |
+
>>> model.generation_config = GenerationConfig.from_pretrained("Flmc/DISC-MedLLM")
|
120 |
+
>>> messages = []
|
121 |
+
>>> messages.append({"role": "user", "content": "我感觉自己颈椎非常不舒服,每天睡醒都会头痛"})
|
122 |
+
>>> response = model.chat(tokenizer, messages)
|
123 |
+
>>> print(response)
|
124 |
+
```
|
125 |
+
|
126 |
+
|
127 |
+
Additionally, since the current version uses Baichuan as the base model, you can refer to its [repo](https://github.com/baichuan-inc/Baichuan-13B) for deploying with int8, int4 quantized inference. However, using quantized deployment will result in performance degradation.
|
128 |
+
<br>
|
129 |
+
|
130 |
+
## Training
|
131 |
+
You can fine-tuning our model using the data same as our data schema.
|
132 |
+
Our train code is derived from [Firefly](https://github.com/yangjianxin1/Firefly) with the different data schema and dialogue format. We jsut provide the code of Full Params Fine-tuning:
|
133 |
+
```shell
|
134 |
+
deepspeed --num_gpus={num_gpus} ./train/train.py --train_args_file ./train/train_args/sft.json
|
135 |
+
```
|
136 |
+
> Please check the setup of `sft.json` before you attempt to start training.
|
137 |
+
|
138 |
+
<br>If you want to fine-tuning our model with other training code, please use the following dialogue format.
|
139 |
+
```shell
|
140 |
+
<\b><$user_token>content<$assistant_token>content<\s><$user_token>content ...
|
141 |
+
```
|
142 |
+
The `user_token` and `assistant_token` we used are `195` and `196`, respectly. Which is same as Baichuan-13b-Chat.
|
143 |
+
|
144 |
+
|
145 |
+
## Delcaration
|
146 |
+
Due to the inherent limitations of language models, we cannot assure the accuracy or reliability of information generated by this model. This model is designed exclusively for research and testing by individuals and academic groups. We urge users to critically assess any information or medical advice obtained through the model's output. Blindly trusting or following such information is strongly discouraged. We disclaim responsibility for any issues, risks, or adverse consequences resulting from the model's use.
|
147 |
+
|
148 |
+
## Licenses
|
149 |
+
The use of the source code in this repository complies with the Apache 2.0 License.
|
150 |
+
|
151 |
+
## Citation
|
152 |
+
```angular2
|
153 |
+
@misc{bao2023discmedllm,
|
154 |
+
title={DISC-MedLLM: Bridging General Large Language Models and Real-World Medical Consultation},
|
155 |
+
author={Zhijie Bao and Wei Chen and Shengze Xiao and Kuang Ren and Jiaao Wu and Cheng Zhong and Jiajie Peng and Xuanjing Huang and Zhongyu Wei},
|
156 |
+
year={2023},
|
157 |
+
eprint={2308.14346},
|
158 |
+
archivePrefix={arXiv},
|
159 |
+
primaryClass={cs.CL}
|
160 |
+
}
|
161 |
+
```
|
config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"_name_or_path": "/root/output/baichuan13b-sft-after300k-artificial-v2/final",
|
4 |
+
"architectures": [
|
5 |
+
"BaichuanForCausalLM"
|
6 |
+
],
|
7 |
+
"auto_map": {
|
8 |
+
"AutoConfig": "configuration_baichuan.BaichuanConfig",
|
9 |
+
"AutoModelForCausalLM": "modeling_baichuan.BaichuanForCausalLM"
|
10 |
+
},
|
11 |
+
"bos_token_id": 1,
|
12 |
+
"eos_token_id": 2,
|
13 |
+
"hidden_act": "silu",
|
14 |
+
"hidden_size": 5120,
|
15 |
+
"initializer_range": 0.02,
|
16 |
+
"intermediate_size": 13696,
|
17 |
+
"model_max_length": 4096,
|
18 |
+
"model_type": "baichuan",
|
19 |
+
"num_attention_heads": 40,
|
20 |
+
"num_hidden_layers": 40,
|
21 |
+
"pad_token_id": 0,
|
22 |
+
"rms_norm_eps": 1e-06,
|
23 |
+
"tie_word_embeddings": false,
|
24 |
+
"torch_dtype": "bfloat16",
|
25 |
+
"transformers_version": "4.31.0",
|
26 |
+
"use_cache": true,
|
27 |
+
"vocab_size": 64000
|
28 |
+
}
|
configuration_baichuan.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
|
2 |
+
|
3 |
+
from transformers.configuration_utils import PretrainedConfig
|
4 |
+
|
5 |
+
class BaichuanConfig(PretrainedConfig):
|
6 |
+
model_type = "baichuan"
|
7 |
+
keys_to_ignore_at_inference = ["past_key_values"]
|
8 |
+
|
9 |
+
def __init__(
|
10 |
+
self,
|
11 |
+
vocab_size=64000,
|
12 |
+
hidden_size=5120,
|
13 |
+
intermediate_size=13696,
|
14 |
+
num_hidden_layers=40,
|
15 |
+
num_attention_heads=40,
|
16 |
+
hidden_act="silu",
|
17 |
+
model_max_length=4096,
|
18 |
+
initializer_range=0.02,
|
19 |
+
rms_norm_eps=1e-6,
|
20 |
+
use_cache=True,
|
21 |
+
pad_token_id=0,
|
22 |
+
bos_token_id=1,
|
23 |
+
eos_token_id=2,
|
24 |
+
tie_word_embeddings=False,
|
25 |
+
gradient_checkpointing=False,
|
26 |
+
**kwargs,
|
27 |
+
):
|
28 |
+
self.vocab_size = vocab_size
|
29 |
+
self.model_max_length = model_max_length
|
30 |
+
self.hidden_size = hidden_size
|
31 |
+
self.intermediate_size = intermediate_size
|
32 |
+
self.num_hidden_layers = num_hidden_layers
|
33 |
+
self.num_attention_heads = num_attention_heads
|
34 |
+
self.hidden_act = hidden_act
|
35 |
+
self.initializer_range = initializer_range
|
36 |
+
self.rms_norm_eps = rms_norm_eps
|
37 |
+
self.use_cache = use_cache
|
38 |
+
self.gradient_checkpointing = gradient_checkpointing,
|
39 |
+
super().__init__(
|
40 |
+
pad_token_id=pad_token_id,
|
41 |
+
bos_token_id=bos_token_id,
|
42 |
+
eos_token_id=eos_token_id,
|
43 |
+
tie_word_embeddings=tie_word_embeddings,
|
44 |
+
**kwargs,
|
45 |
+
)
|
46 |
+
|
generation_config.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"assistant_token_id": 196,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"do_sample": true,
|
5 |
+
"eos_token_id": 2,
|
6 |
+
"max_new_tokens": 2048,
|
7 |
+
"pad_token_id": 0,
|
8 |
+
"repetition_penalty": 1.1,
|
9 |
+
"temperature": 0.3,
|
10 |
+
"top_k": 5,
|
11 |
+
"top_p": 0.85,
|
12 |
+
"transformers_version": "4.31.0",
|
13 |
+
"user_token_id": 195
|
14 |
+
}
|
generation_utils.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
from queue import Queue
|
3 |
+
|
4 |
+
import torch
|
5 |
+
|
6 |
+
|
7 |
+
def build_chat_input(model, tokenizer, messages: List[dict], max_new_tokens: int=0):
|
8 |
+
def _parse_messages(messages, split_role="user"):
|
9 |
+
system, rounds = "", []
|
10 |
+
round = []
|
11 |
+
for i, message in enumerate(messages):
|
12 |
+
if message["role"] == "system":
|
13 |
+
assert i == 0
|
14 |
+
system = message["content"]
|
15 |
+
continue
|
16 |
+
if message["role"] == split_role and round:
|
17 |
+
rounds.append(round)
|
18 |
+
round = []
|
19 |
+
round.append(message)
|
20 |
+
if round:
|
21 |
+
rounds.append(round)
|
22 |
+
return system, rounds
|
23 |
+
|
24 |
+
max_new_tokens = max_new_tokens or model.generation_config.max_new_tokens
|
25 |
+
max_input_tokens = model.config.model_max_length - max_new_tokens
|
26 |
+
system, rounds = _parse_messages(messages, split_role="user")
|
27 |
+
system_tokens = tokenizer.encode(system)
|
28 |
+
max_history_tokens = max_input_tokens - len(system_tokens)
|
29 |
+
|
30 |
+
history_tokens = []
|
31 |
+
for round in rounds[::-1]:
|
32 |
+
round_tokens = []
|
33 |
+
for message in round:
|
34 |
+
if message["role"] == "user":
|
35 |
+
round_tokens.append(model.generation_config.user_token_id)
|
36 |
+
else:
|
37 |
+
round_tokens.append(model.generation_config.assistant_token_id)
|
38 |
+
round_tokens.extend(tokenizer.encode(message["content"]))
|
39 |
+
if len(history_tokens) == 0 or len(history_tokens) + len(round_tokens) <= max_history_tokens:
|
40 |
+
history_tokens = round_tokens + history_tokens # concat left
|
41 |
+
if len(history_tokens) < max_history_tokens:
|
42 |
+
continue
|
43 |
+
break
|
44 |
+
|
45 |
+
input_tokens = system_tokens + history_tokens
|
46 |
+
if messages[-1]["role"] != "assistant":
|
47 |
+
input_tokens.append(model.generation_config.assistant_token_id)
|
48 |
+
input_tokens = input_tokens[-max_input_tokens:] # truncate left
|
49 |
+
return torch.LongTensor([input_tokens]).to(model.device)
|
50 |
+
|
51 |
+
|
52 |
+
class TextIterStreamer:
|
53 |
+
def __init__(self, tokenizer, skip_prompt=False, skip_special_tokens=False):
|
54 |
+
self.tokenizer = tokenizer
|
55 |
+
self.skip_prompt = skip_prompt
|
56 |
+
self.skip_special_tokens = skip_special_tokens
|
57 |
+
self.tokens = []
|
58 |
+
self.text_queue = Queue()
|
59 |
+
self.next_tokens_are_prompt = True
|
60 |
+
|
61 |
+
def put(self, value):
|
62 |
+
if self.skip_prompt and self.next_tokens_are_prompt:
|
63 |
+
self.next_tokens_are_prompt = False
|
64 |
+
else:
|
65 |
+
if len(value.shape) > 1:
|
66 |
+
value = value[0]
|
67 |
+
self.tokens.extend(value.tolist())
|
68 |
+
self.text_queue.put(
|
69 |
+
self.tokenizer.decode(self.tokens, skip_special_tokens=self.skip_special_tokens))
|
70 |
+
|
71 |
+
def end(self):
|
72 |
+
self.text_queue.put(None)
|
73 |
+
|
74 |
+
def __iter__(self):
|
75 |
+
return self
|
76 |
+
|
77 |
+
def __next__(self):
|
78 |
+
value = self.text_queue.get()
|
79 |
+
if value is None:
|
80 |
+
raise StopIteration()
|
81 |
+
else:
|
82 |
+
return value
|
modeling_baichuan.py
ADDED
@@ -0,0 +1,572 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
|
2 |
+
|
3 |
+
import math
|
4 |
+
from threading import Thread
|
5 |
+
from typing import List, Optional, Tuple, Union
|
6 |
+
|
7 |
+
import torch
|
8 |
+
import torch.utils.checkpoint
|
9 |
+
from torch.nn import CrossEntropyLoss
|
10 |
+
from transformers import PreTrainedModel
|
11 |
+
from transformers.activations import ACT2FN
|
12 |
+
from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
|
13 |
+
from transformers.utils import logging
|
14 |
+
from transformers.generation.utils import GenerationConfig
|
15 |
+
|
16 |
+
from .configuration_baichuan import BaichuanConfig
|
17 |
+
from .generation_utils import build_chat_input, TextIterStreamer
|
18 |
+
|
19 |
+
logger = logging.get_logger(__name__)
|
20 |
+
|
21 |
+
|
22 |
+
def _get_interleave(n):
|
23 |
+
def _get_interleave_power_of_2(n):
|
24 |
+
start = (2 ** (-2 ** -(math.log2(n) - 3)))
|
25 |
+
ratio = start
|
26 |
+
return [start * ratio ** i for i in range(n)]
|
27 |
+
|
28 |
+
if math.log2(n).is_integer():
|
29 |
+
return _get_interleave_power_of_2(n)
|
30 |
+
else:
|
31 |
+
closest_power_of_2 = 2 ** math.floor(math.log2(n))
|
32 |
+
return _get_interleave_power_of_2(closest_power_of_2) + \
|
33 |
+
_get_interleave(2 * closest_power_of_2)[0::2][:n - closest_power_of_2]
|
34 |
+
|
35 |
+
def _fill_with_neg_inf(t):
|
36 |
+
"""FP16-compatible function that fills a tensor with -inf."""
|
37 |
+
return t.float().fill_(float("-inf")).type_as(t)
|
38 |
+
|
39 |
+
def _gen_alibi_mask(n_head, max_pos):
|
40 |
+
"""used in inference only"""
|
41 |
+
slopes = torch.Tensor(_get_interleave(n_head))
|
42 |
+
alibi = slopes.unsqueeze(1).unsqueeze(1) * torch.arange(max_pos).unsqueeze(0).unsqueeze(0).expand(
|
43 |
+
n_head, -1, -1)
|
44 |
+
alibi = alibi.view(n_head, 1, max_pos)
|
45 |
+
alibi_mask = torch.triu(
|
46 |
+
_fill_with_neg_inf(torch.zeros([max_pos, max_pos])), 1
|
47 |
+
)
|
48 |
+
alibi_mask = alibi_mask.unsqueeze(0) + alibi
|
49 |
+
return alibi_mask
|
50 |
+
|
51 |
+
def _buffered_future_mask(tensor, maxpos, alibi, attn_heads):
|
52 |
+
"""used in training only"""
|
53 |
+
dim = tensor.size(1)
|
54 |
+
_future_mask = torch.triu(
|
55 |
+
_fill_with_neg_inf(torch.zeros([maxpos, maxpos])), 1
|
56 |
+
)
|
57 |
+
_future_mask = _future_mask.unsqueeze(0) + alibi
|
58 |
+
_future_mask = _future_mask.to(tensor)
|
59 |
+
return _future_mask[:tensor.shape[0] * attn_heads, :maxpos, :maxpos]
|
60 |
+
|
61 |
+
|
62 |
+
class RMSNorm(torch.nn.Module):
|
63 |
+
def __init__(self, hidden_size, epsilon=1e-6):
|
64 |
+
super().__init__()
|
65 |
+
self.weight = torch.nn.Parameter(torch.empty(hidden_size))
|
66 |
+
self.epsilon = epsilon
|
67 |
+
|
68 |
+
def forward(self, hidden_states):
|
69 |
+
variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True)
|
70 |
+
hidden_states = hidden_states * torch.rsqrt(variance + self.epsilon)
|
71 |
+
|
72 |
+
# convert into half-precision
|
73 |
+
if self.weight.dtype in [torch.float16, torch.bfloat16]:
|
74 |
+
hidden_states = hidden_states.to(self.weight.dtype)
|
75 |
+
|
76 |
+
return self.weight * hidden_states
|
77 |
+
|
78 |
+
|
79 |
+
class MLP(torch.nn.Module):
|
80 |
+
def __init__(
|
81 |
+
self,
|
82 |
+
hidden_size: int,
|
83 |
+
intermediate_size: int,
|
84 |
+
hidden_act: str,
|
85 |
+
):
|
86 |
+
super().__init__()
|
87 |
+
self.gate_proj = torch.nn.Linear(hidden_size, intermediate_size, bias=False)
|
88 |
+
self.down_proj = torch.nn.Linear(intermediate_size, hidden_size, bias=False)
|
89 |
+
self.up_proj = torch.nn.Linear(hidden_size, intermediate_size, bias=False)
|
90 |
+
self.act_fn = ACT2FN[hidden_act]
|
91 |
+
|
92 |
+
def forward(self, x):
|
93 |
+
return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
|
94 |
+
|
95 |
+
|
96 |
+
class BaichuanAttention(torch.nn.Module):
|
97 |
+
def __init__(self, config: BaichuanConfig):
|
98 |
+
super().__init__()
|
99 |
+
self.config = config
|
100 |
+
self.hidden_size = config.hidden_size
|
101 |
+
self.num_heads = config.num_attention_heads
|
102 |
+
self.head_dim = self.hidden_size // self.num_heads
|
103 |
+
self.max_position_embeddings = config.model_max_length
|
104 |
+
|
105 |
+
if (self.head_dim * self.num_heads) != self.hidden_size:
|
106 |
+
raise ValueError(
|
107 |
+
f"hidden_size {self.hidden_size} is not divisible by num_heads {self.num_heads}"
|
108 |
+
)
|
109 |
+
self.W_pack = torch.nn.Linear(self.hidden_size, 3 * self.hidden_size, bias=False)
|
110 |
+
self.o_proj = torch.nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
|
111 |
+
|
112 |
+
def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
|
113 |
+
return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
|
114 |
+
|
115 |
+
def forward(
|
116 |
+
self,
|
117 |
+
hidden_states: torch.Tensor,
|
118 |
+
attention_mask: Optional[torch.Tensor] = None,
|
119 |
+
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
120 |
+
output_attentions: bool = False,
|
121 |
+
use_cache: bool = False,
|
122 |
+
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
|
123 |
+
|
124 |
+
bsz, q_len, _ = hidden_states.size()
|
125 |
+
|
126 |
+
proj = self.W_pack(hidden_states)
|
127 |
+
proj = proj.unflatten(-1, (3, self.hidden_size)).unsqueeze(0).transpose(0, -2).squeeze(-2)
|
128 |
+
query_states = proj[0].view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
|
129 |
+
key_states = proj[1].view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
|
130 |
+
value_states = proj[2].view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
|
131 |
+
|
132 |
+
kv_seq_len = key_states.shape[-2]
|
133 |
+
if past_key_value is not None:
|
134 |
+
kv_seq_len += past_key_value[0].shape[-2]
|
135 |
+
|
136 |
+
if past_key_value is not None:
|
137 |
+
# reuse k, v, self_attention
|
138 |
+
key_states = torch.cat([past_key_value[0], key_states], dim=2)
|
139 |
+
value_states = torch.cat([past_key_value[1], value_states], dim=2)
|
140 |
+
|
141 |
+
past_key_value = (key_states, value_states) if use_cache else None
|
142 |
+
|
143 |
+
attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
|
144 |
+
|
145 |
+
if attention_mask is not None:
|
146 |
+
if q_len == 1: # inference with cache
|
147 |
+
if len(attention_mask.size()) == 4:
|
148 |
+
attention_mask = attention_mask[:, :, -1:, :]
|
149 |
+
else:
|
150 |
+
attention_mask = attention_mask[:, -1:, :]
|
151 |
+
attn_weights = attn_weights + attention_mask
|
152 |
+
attn_weights = torch.max(attn_weights, torch.tensor(torch.finfo(attn_weights.dtype).min))
|
153 |
+
|
154 |
+
attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1)
|
155 |
+
|
156 |
+
attn_output = torch.matmul(attn_weights, value_states)
|
157 |
+
|
158 |
+
attn_output = attn_output.transpose(1, 2)
|
159 |
+
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
|
160 |
+
attn_output = self.o_proj(attn_output)
|
161 |
+
|
162 |
+
if not output_attentions:
|
163 |
+
attn_weights = None
|
164 |
+
|
165 |
+
return attn_output, attn_weights, past_key_value
|
166 |
+
|
167 |
+
|
168 |
+
class BaichuanLayer(torch.nn.Module):
|
169 |
+
def __init__(self, config: BaichuanConfig):
|
170 |
+
super().__init__()
|
171 |
+
self.hidden_size = config.hidden_size
|
172 |
+
self.self_attn = BaichuanAttention(config=config)
|
173 |
+
self.mlp = MLP(
|
174 |
+
hidden_size=self.hidden_size,
|
175 |
+
intermediate_size=config.intermediate_size,
|
176 |
+
hidden_act=config.hidden_act,
|
177 |
+
)
|
178 |
+
self.input_layernorm = RMSNorm(config.hidden_size, epsilon=config.rms_norm_eps)
|
179 |
+
self.post_attention_layernorm = RMSNorm(config.hidden_size, epsilon=config.rms_norm_eps)
|
180 |
+
|
181 |
+
def forward(
|
182 |
+
self,
|
183 |
+
hidden_states: torch.Tensor,
|
184 |
+
attention_mask: Optional[torch.Tensor] = None,
|
185 |
+
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
186 |
+
output_attentions: Optional[bool] = False,
|
187 |
+
use_cache: Optional[bool] = False,
|
188 |
+
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
189 |
+
|
190 |
+
residual = hidden_states
|
191 |
+
|
192 |
+
hidden_states = self.input_layernorm(hidden_states)
|
193 |
+
|
194 |
+
# Self Attention
|
195 |
+
hidden_states, self_attn_weights, present_key_value = self.self_attn(
|
196 |
+
hidden_states=hidden_states,
|
197 |
+
attention_mask=attention_mask,
|
198 |
+
past_key_value=past_key_value,
|
199 |
+
output_attentions=output_attentions,
|
200 |
+
use_cache=use_cache,
|
201 |
+
)
|
202 |
+
hidden_states = residual + hidden_states
|
203 |
+
|
204 |
+
# Fully Connected
|
205 |
+
residual = hidden_states
|
206 |
+
hidden_states = self.post_attention_layernorm(hidden_states)
|
207 |
+
hidden_states = self.mlp(hidden_states)
|
208 |
+
hidden_states = residual + hidden_states
|
209 |
+
|
210 |
+
outputs = (hidden_states,)
|
211 |
+
|
212 |
+
if use_cache:
|
213 |
+
outputs += (present_key_value,)
|
214 |
+
|
215 |
+
return outputs
|
216 |
+
|
217 |
+
|
218 |
+
class BaichuanPreTrainedModel(PreTrainedModel):
|
219 |
+
config_class = BaichuanConfig
|
220 |
+
base_model_prefix = "model"
|
221 |
+
supports_gradient_checkpointing = True
|
222 |
+
_no_split_modules = ["BaichuanLayer"]
|
223 |
+
_keys_to_ignore_on_load_unexpected = [r"decoder\.version"]
|
224 |
+
|
225 |
+
def _init_weights(self, module):
|
226 |
+
std = self.config.initializer_range
|
227 |
+
if isinstance(module, torch.nn.Linear):
|
228 |
+
module.weight.data.normal_(mean=0.0, std=std)
|
229 |
+
if module.bias is not None:
|
230 |
+
module.bias.data.zero_()
|
231 |
+
elif isinstance(module, torch.nn.Embedding):
|
232 |
+
module.weight.data.normal_(mean=0.0, std=std)
|
233 |
+
if module.padding_idx is not None:
|
234 |
+
module.weight.data[module.padding_idx].zero_()
|
235 |
+
|
236 |
+
def _set_gradient_checkpointing(self, module, value=False):
|
237 |
+
if isinstance(module, BaichuanModel):
|
238 |
+
module.gradient_checkpointing = value
|
239 |
+
|
240 |
+
|
241 |
+
class BaichuanModel(BaichuanPreTrainedModel):
|
242 |
+
def __init__(self, config: BaichuanConfig):
|
243 |
+
super().__init__(config)
|
244 |
+
self.padding_idx = config.pad_token_id
|
245 |
+
self.vocab_size = config.vocab_size
|
246 |
+
self.n_head = config.num_attention_heads
|
247 |
+
self.embed_tokens = torch.nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
|
248 |
+
self.layers = torch.nn.ModuleList([BaichuanLayer(config) for _ in range(config.num_hidden_layers)])
|
249 |
+
self.norm = RMSNorm(config.hidden_size, epsilon=config.rms_norm_eps)
|
250 |
+
|
251 |
+
self.gradient_checkpointing = config.gradient_checkpointing
|
252 |
+
self.post_init()
|
253 |
+
self.max_cache_pos = config.model_max_length
|
254 |
+
self.first_run = True
|
255 |
+
self.alibi_mask = None
|
256 |
+
|
257 |
+
def get_input_embeddings(self):
|
258 |
+
return self.embed_tokens
|
259 |
+
|
260 |
+
def set_input_embeddings(self, value):
|
261 |
+
self.embed_tokens = value
|
262 |
+
|
263 |
+
def get_alibi_mask(self, tensor, seq_length_with_past):
|
264 |
+
if self.training:
|
265 |
+
slopes = torch.Tensor(_get_interleave(self.n_head))
|
266 |
+
alibi = slopes.unsqueeze(1).unsqueeze(1) * torch.arange(seq_length_with_past).unsqueeze(0).unsqueeze(0).expand(
|
267 |
+
self.n_head,
|
268 |
+
-1, -1)
|
269 |
+
alibi = alibi.view(self.n_head, 1, seq_length_with_past)
|
270 |
+
mask = _buffered_future_mask(tensor, seq_length_with_past, alibi, self.n_head)
|
271 |
+
else:
|
272 |
+
if self.first_run:
|
273 |
+
self.first_run = False
|
274 |
+
self.register_buffer("future_mask", _gen_alibi_mask(self.n_head, self.max_cache_pos).to(tensor), persistent=False)
|
275 |
+
if seq_length_with_past > self.max_cache_pos:
|
276 |
+
self.max_cache_pos = seq_length_with_past
|
277 |
+
self.register_buffer("future_mask", _gen_alibi_mask(self.n_head, self.max_cache_pos).to(tensor), persistent=False)
|
278 |
+
mask = self.future_mask[:self.n_head, :seq_length_with_past, :seq_length_with_past]
|
279 |
+
return mask
|
280 |
+
|
281 |
+
def forward(
|
282 |
+
self,
|
283 |
+
input_ids: torch.LongTensor = None,
|
284 |
+
attention_mask: Optional[torch.Tensor] = None,
|
285 |
+
past_key_values: Optional[List[torch.FloatTensor]] = None,
|
286 |
+
inputs_embeds: Optional[torch.FloatTensor] = None,
|
287 |
+
use_cache: Optional[bool] = False,
|
288 |
+
output_attentions: Optional[bool] = False,
|
289 |
+
output_hidden_states: Optional[bool] = False,
|
290 |
+
return_dict: Optional[bool] = True,
|
291 |
+
) -> Union[Tuple, BaseModelOutputWithPast]:
|
292 |
+
|
293 |
+
if input_ids is not None and inputs_embeds is not None:
|
294 |
+
raise ValueError("You cannot provide both input_ids and inputs_embeds simultaneously")
|
295 |
+
elif input_ids is not None:
|
296 |
+
batch_size, seq_length = input_ids.shape
|
297 |
+
elif inputs_embeds is not None:
|
298 |
+
batch_size, seq_length, _ = inputs_embeds.shape
|
299 |
+
else:
|
300 |
+
raise ValueError("You need to provide input_ids or inputs_embeds")
|
301 |
+
|
302 |
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
303 |
+
|
304 |
+
seq_length_with_past = seq_length
|
305 |
+
|
306 |
+
if past_key_values is not None:
|
307 |
+
past_key_values_length = past_key_values[0][0].shape[2]
|
308 |
+
seq_length_with_past = seq_length_with_past + past_key_values_length
|
309 |
+
|
310 |
+
if inputs_embeds is None:
|
311 |
+
inputs_embeds = self.embed_tokens(input_ids)
|
312 |
+
|
313 |
+
if self.training:
|
314 |
+
if self.alibi_mask is None or self.alibi_mask.shape[-1] != seq_length_with_past:
|
315 |
+
self.alibi_mask = self.get_alibi_mask(inputs_embeds, seq_length_with_past)
|
316 |
+
alibi_mask = self.alibi_mask
|
317 |
+
else:
|
318 |
+
alibi_mask = self.get_alibi_mask(inputs_embeds, seq_length_with_past)
|
319 |
+
|
320 |
+
if attention_mask is not None:
|
321 |
+
if len(attention_mask.shape) == 2:
|
322 |
+
expanded_mask = attention_mask.to(alibi_mask.dtype)
|
323 |
+
expanded_mask = torch.tril(torch.gt(expanded_mask[:, :, None] * expanded_mask[:, None, :], 0)
|
324 |
+
) * torch.eq(expanded_mask[:, :, None] - expanded_mask[:, None, :], 0)
|
325 |
+
else:
|
326 |
+
expanded_mask = attention_mask
|
327 |
+
bsz = inputs_embeds.size(0)
|
328 |
+
src_len, tgt_len = alibi_mask.size()[-2:]
|
329 |
+
expanded_mask = expanded_mask.unsqueeze(1).expand(bsz, 1, src_len, tgt_len).to(alibi_mask.dtype)
|
330 |
+
inverted_mask = 1.0 - expanded_mask
|
331 |
+
inverted_mask = inverted_mask.masked_fill(inverted_mask.to(torch.bool), torch.finfo(alibi_mask.dtype).min)
|
332 |
+
attention_mask = inverted_mask + alibi_mask.unsqueeze(0)
|
333 |
+
else:
|
334 |
+
attention_mask = alibi_mask
|
335 |
+
|
336 |
+
hidden_states = inputs_embeds
|
337 |
+
|
338 |
+
if self.gradient_checkpointing and self.training:
|
339 |
+
if use_cache:
|
340 |
+
logger.warning_once(
|
341 |
+
"`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
|
342 |
+
)
|
343 |
+
use_cache = False
|
344 |
+
|
345 |
+
# decoder layers
|
346 |
+
all_hidden_states = () if output_hidden_states else None
|
347 |
+
all_self_attns = () if output_attentions else None
|
348 |
+
next_decoder_cache = () if use_cache else None
|
349 |
+
|
350 |
+
for idx, decoder_layer in enumerate(self.layers):
|
351 |
+
if output_hidden_states:
|
352 |
+
all_hidden_states += (hidden_states,)
|
353 |
+
|
354 |
+
past_key_value = past_key_values[idx] if past_key_values is not None else None
|
355 |
+
|
356 |
+
if self.gradient_checkpointing and self.training:
|
357 |
+
|
358 |
+
def create_custom_forward(module):
|
359 |
+
def custom_forward(*inputs):
|
360 |
+
# None for past_key_value
|
361 |
+
return module(*inputs, output_attentions, None)
|
362 |
+
|
363 |
+
return custom_forward
|
364 |
+
|
365 |
+
layer_outputs = torch.utils.checkpoint.checkpoint(
|
366 |
+
create_custom_forward(decoder_layer),
|
367 |
+
hidden_states,
|
368 |
+
attention_mask,
|
369 |
+
None,
|
370 |
+
)
|
371 |
+
else:
|
372 |
+
layer_outputs = decoder_layer(
|
373 |
+
hidden_states,
|
374 |
+
attention_mask=attention_mask,
|
375 |
+
past_key_value=past_key_value,
|
376 |
+
output_attentions=output_attentions,
|
377 |
+
use_cache=use_cache,
|
378 |
+
)
|
379 |
+
|
380 |
+
hidden_states = layer_outputs[0]
|
381 |
+
|
382 |
+
if use_cache:
|
383 |
+
next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
|
384 |
+
|
385 |
+
if output_attentions:
|
386 |
+
all_self_attns += (layer_outputs[1],)
|
387 |
+
|
388 |
+
hidden_states = self.norm(hidden_states)
|
389 |
+
|
390 |
+
# add hidden states from the last decoder layer
|
391 |
+
if output_hidden_states:
|
392 |
+
all_hidden_states += (hidden_states,)
|
393 |
+
|
394 |
+
next_cache = next_decoder_cache if use_cache else None
|
395 |
+
if not return_dict:
|
396 |
+
return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
|
397 |
+
return BaseModelOutputWithPast(
|
398 |
+
last_hidden_state=hidden_states,
|
399 |
+
past_key_values=next_cache,
|
400 |
+
hidden_states=all_hidden_states,
|
401 |
+
attentions=all_self_attns,
|
402 |
+
)
|
403 |
+
|
404 |
+
|
405 |
+
class BaichuanForCausalLM(BaichuanPreTrainedModel):
|
406 |
+
def __init__(self, config):
|
407 |
+
super().__init__(config)
|
408 |
+
self.model = BaichuanModel(config)
|
409 |
+
self.lm_head = torch.nn.Linear(config.hidden_size, config.vocab_size, bias=False)
|
410 |
+
|
411 |
+
# Initialize weights and apply final processing
|
412 |
+
self.post_init()
|
413 |
+
|
414 |
+
def get_input_embeddings(self):
|
415 |
+
return self.model.embed_tokens
|
416 |
+
|
417 |
+
def set_input_embeddings(self, value):
|
418 |
+
self.model.embed_tokens = value
|
419 |
+
|
420 |
+
def get_output_embeddings(self):
|
421 |
+
return self.lm_head
|
422 |
+
|
423 |
+
def set_output_embeddings(self, new_embeddings):
|
424 |
+
self.lm_head = new_embeddings
|
425 |
+
|
426 |
+
def set_decoder(self, decoder):
|
427 |
+
self.model = decoder
|
428 |
+
|
429 |
+
def get_decoder(self):
|
430 |
+
return self.model
|
431 |
+
|
432 |
+
def forward(
|
433 |
+
self,
|
434 |
+
input_ids: torch.LongTensor = None,
|
435 |
+
attention_mask: Optional[torch.Tensor] = None,
|
436 |
+
past_key_values: Optional[List[torch.FloatTensor]] = None,
|
437 |
+
inputs_embeds: Optional[torch.FloatTensor] = None,
|
438 |
+
labels: Optional[torch.LongTensor] = None,
|
439 |
+
use_cache: Optional[bool] = None,
|
440 |
+
output_attentions: Optional[bool] = False,
|
441 |
+
output_hidden_states: Optional[bool] = False,
|
442 |
+
return_dict: Optional[bool] = True,
|
443 |
+
**kwargs
|
444 |
+
) -> Union[Tuple, CausalLMOutputWithPast]:
|
445 |
+
|
446 |
+
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
447 |
+
|
448 |
+
# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
|
449 |
+
outputs = self.model(
|
450 |
+
input_ids=input_ids,
|
451 |
+
attention_mask=attention_mask,
|
452 |
+
past_key_values=past_key_values,
|
453 |
+
inputs_embeds=inputs_embeds,
|
454 |
+
use_cache=use_cache,
|
455 |
+
output_attentions=output_attentions,
|
456 |
+
output_hidden_states=output_hidden_states,
|
457 |
+
return_dict=return_dict,
|
458 |
+
)
|
459 |
+
|
460 |
+
hidden_states = outputs[0]
|
461 |
+
logits = self.lm_head(hidden_states)
|
462 |
+
|
463 |
+
loss = None
|
464 |
+
if labels is not None:
|
465 |
+
# Shift so that tokens < n predict n
|
466 |
+
shift_logits = logits[..., :-1, :].contiguous()
|
467 |
+
shift_labels = labels[..., 1:].contiguous()
|
468 |
+
# Flatten the tokens
|
469 |
+
loss_fct = CrossEntropyLoss()
|
470 |
+
shift_logits = shift_logits.view(-1, self.config.vocab_size)
|
471 |
+
shift_labels = shift_labels.view(-1)
|
472 |
+
# Enable model parallelism
|
473 |
+
shift_labels = shift_labels.to(shift_logits.device)
|
474 |
+
loss = loss_fct(shift_logits, shift_labels)
|
475 |
+
|
476 |
+
if not return_dict:
|
477 |
+
output = (logits,) + outputs[1:]
|
478 |
+
return (loss,) + output if loss is not None else output
|
479 |
+
|
480 |
+
return CausalLMOutputWithPast(
|
481 |
+
loss=loss,
|
482 |
+
logits=logits,
|
483 |
+
past_key_values=outputs.past_key_values,
|
484 |
+
hidden_states=outputs.hidden_states,
|
485 |
+
attentions=outputs.attentions,
|
486 |
+
)
|
487 |
+
|
488 |
+
def prepare_inputs_for_generation(
|
489 |
+
self,
|
490 |
+
input_ids: torch.LongTensor,
|
491 |
+
past_key_values: Optional[torch.Tensor] = None,
|
492 |
+
attention_mask: Optional[torch.Tensor] = None,
|
493 |
+
inputs_embeds: Optional[torch.Tensor] = None,
|
494 |
+
**kwargs
|
495 |
+
):
|
496 |
+
if past_key_values:
|
497 |
+
input_ids = input_ids[:, -1:]
|
498 |
+
|
499 |
+
# if `inputs_embeds` are passed, we only want to use them in the 1st generation step
|
500 |
+
if inputs_embeds is not None and past_key_values is None:
|
501 |
+
model_inputs = {"inputs_embeds": inputs_embeds}
|
502 |
+
else:
|
503 |
+
model_inputs = {"input_ids": input_ids}
|
504 |
+
|
505 |
+
model_inputs.update(
|
506 |
+
{
|
507 |
+
"past_key_values": past_key_values,
|
508 |
+
"use_cache": kwargs.get("use_cache"),
|
509 |
+
"attention_mask": attention_mask
|
510 |
+
}
|
511 |
+
)
|
512 |
+
return model_inputs
|
513 |
+
|
514 |
+
@staticmethod
|
515 |
+
def _reorder_cache(past_key_values, beam_idx):
|
516 |
+
return tuple(
|
517 |
+
tuple(past_state.index_select(0, beam_idx) for past_state in layer_past)
|
518 |
+
for layer_past in past_key_values
|
519 |
+
)
|
520 |
+
|
521 |
+
def quantize(self, bits: int):
|
522 |
+
try:
|
523 |
+
from .quantizer import QLinear
|
524 |
+
except ImportError:
|
525 |
+
raise ImportError(
|
526 |
+
f"Needs QLinear to run quantize."
|
527 |
+
)
|
528 |
+
|
529 |
+
for layer in self.model.layers:
|
530 |
+
layer.self_attn.W_pack = QLinear(
|
531 |
+
bits=bits,
|
532 |
+
weight=layer.self_attn.W_pack.weight,
|
533 |
+
bias = None,
|
534 |
+
)
|
535 |
+
layer.self_attn.o_proj = QLinear(
|
536 |
+
bits=bits,
|
537 |
+
weight=layer.self_attn.o_proj.weight,
|
538 |
+
bias = None,
|
539 |
+
)
|
540 |
+
layer.mlp.gate_proj = QLinear(
|
541 |
+
bits=bits,
|
542 |
+
weight=layer.mlp.gate_proj.weight,
|
543 |
+
bias = None,
|
544 |
+
)
|
545 |
+
layer.mlp.down_proj = QLinear(
|
546 |
+
bits=bits,
|
547 |
+
weight=layer.mlp.down_proj.weight,
|
548 |
+
bias = None,
|
549 |
+
)
|
550 |
+
layer.mlp.up_proj = QLinear(
|
551 |
+
bits=bits,
|
552 |
+
weight=layer.mlp.up_proj.weight,
|
553 |
+
bias = None,
|
554 |
+
)
|
555 |
+
return self
|
556 |
+
|
557 |
+
@torch.no_grad()
|
558 |
+
def chat(self, tokenizer, messages: List[dict], stream=False,
|
559 |
+
generation_config: Optional[GenerationConfig]=None):
|
560 |
+
generation_config = generation_config or self.generation_config
|
561 |
+
input_ids = build_chat_input(self, tokenizer, messages, generation_config.max_new_tokens)
|
562 |
+
if stream:
|
563 |
+
streamer = TextIterStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
564 |
+
Thread(target=self.generate, kwargs=dict(
|
565 |
+
inputs=input_ids, streamer=streamer,
|
566 |
+
generation_config=generation_config,
|
567 |
+
)).start()
|
568 |
+
return streamer
|
569 |
+
else:
|
570 |
+
outputs = self.generate(input_ids, generation_config=generation_config)
|
571 |
+
response = tokenizer.decode(outputs[0][len(input_ids[0]):], skip_special_tokens=True)
|
572 |
+
return response
|
pytorch_model-00001-of-00003.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2af9046b25d9384af4b4a3017cfd9df851ec3dc82225ce0f94b55f94671d4f9
|
3 |
+
size 9972279780
|
pytorch_model-00002-of-00003.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cea4af5d338f4803b369e0eb84598081ebfcf2556d8b739788d623a758cfce54
|
3 |
+
size 9947419824
|
pytorch_model-00003-of-00003.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd8e270f939069066b659eda1d2e19b94a43d4ef10809b9e98ec2a678aeed8c2
|
3 |
+
size 6610199329
|
pytorch_model.bin.index.json
ADDED
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"total_size": 26529802240
|
4 |
+
},
|
5 |
+
"weight_map": {
|
6 |
+
"lm_head.weight": "pytorch_model-00003-of-00003.bin",
|
7 |
+
"model.embed_tokens.weight": "pytorch_model-00001-of-00003.bin",
|
8 |
+
"model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
9 |
+
"model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
10 |
+
"model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
11 |
+
"model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
12 |
+
"model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
13 |
+
"model.layers.0.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
14 |
+
"model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
15 |
+
"model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
16 |
+
"model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
17 |
+
"model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
18 |
+
"model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
19 |
+
"model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
20 |
+
"model.layers.1.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
21 |
+
"model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
22 |
+
"model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
23 |
+
"model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
24 |
+
"model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
25 |
+
"model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
26 |
+
"model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
27 |
+
"model.layers.10.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
28 |
+
"model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
29 |
+
"model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
30 |
+
"model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
31 |
+
"model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
32 |
+
"model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
33 |
+
"model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
34 |
+
"model.layers.11.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
35 |
+
"model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
36 |
+
"model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
37 |
+
"model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
38 |
+
"model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
39 |
+
"model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
40 |
+
"model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
41 |
+
"model.layers.12.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
42 |
+
"model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
43 |
+
"model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
44 |
+
"model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
45 |
+
"model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
46 |
+
"model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
47 |
+
"model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
48 |
+
"model.layers.13.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
49 |
+
"model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
50 |
+
"model.layers.14.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
51 |
+
"model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
52 |
+
"model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
53 |
+
"model.layers.14.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
54 |
+
"model.layers.14.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
55 |
+
"model.layers.14.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
56 |
+
"model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
57 |
+
"model.layers.15.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
58 |
+
"model.layers.15.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
59 |
+
"model.layers.15.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
60 |
+
"model.layers.15.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
61 |
+
"model.layers.15.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
62 |
+
"model.layers.15.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
63 |
+
"model.layers.15.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
64 |
+
"model.layers.16.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
65 |
+
"model.layers.16.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
66 |
+
"model.layers.16.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
67 |
+
"model.layers.16.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
68 |
+
"model.layers.16.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
69 |
+
"model.layers.16.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
70 |
+
"model.layers.16.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
71 |
+
"model.layers.17.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
72 |
+
"model.layers.17.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
73 |
+
"model.layers.17.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
74 |
+
"model.layers.17.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
75 |
+
"model.layers.17.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
76 |
+
"model.layers.17.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
77 |
+
"model.layers.17.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
78 |
+
"model.layers.18.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
79 |
+
"model.layers.18.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
80 |
+
"model.layers.18.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
81 |
+
"model.layers.18.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
82 |
+
"model.layers.18.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
83 |
+
"model.layers.18.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
84 |
+
"model.layers.18.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
85 |
+
"model.layers.19.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
86 |
+
"model.layers.19.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
87 |
+
"model.layers.19.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
88 |
+
"model.layers.19.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
89 |
+
"model.layers.19.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
90 |
+
"model.layers.19.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
91 |
+
"model.layers.19.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
92 |
+
"model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
93 |
+
"model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
94 |
+
"model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
95 |
+
"model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
96 |
+
"model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
97 |
+
"model.layers.2.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
98 |
+
"model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
99 |
+
"model.layers.20.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
100 |
+
"model.layers.20.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
101 |
+
"model.layers.20.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
102 |
+
"model.layers.20.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
103 |
+
"model.layers.20.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
104 |
+
"model.layers.20.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
105 |
+
"model.layers.20.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
106 |
+
"model.layers.21.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
107 |
+
"model.layers.21.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
108 |
+
"model.layers.21.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
109 |
+
"model.layers.21.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
110 |
+
"model.layers.21.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
111 |
+
"model.layers.21.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
112 |
+
"model.layers.21.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
113 |
+
"model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
114 |
+
"model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
115 |
+
"model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
116 |
+
"model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
117 |
+
"model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
118 |
+
"model.layers.22.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
119 |
+
"model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
120 |
+
"model.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
121 |
+
"model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
122 |
+
"model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
123 |
+
"model.layers.23.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
124 |
+
"model.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
125 |
+
"model.layers.23.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
126 |
+
"model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
127 |
+
"model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
128 |
+
"model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
129 |
+
"model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
130 |
+
"model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
131 |
+
"model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
132 |
+
"model.layers.24.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
133 |
+
"model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
134 |
+
"model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
135 |
+
"model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
136 |
+
"model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
137 |
+
"model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
138 |
+
"model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
139 |
+
"model.layers.25.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
140 |
+
"model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
141 |
+
"model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
142 |
+
"model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
143 |
+
"model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
144 |
+
"model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
145 |
+
"model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
146 |
+
"model.layers.26.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
147 |
+
"model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
148 |
+
"model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
149 |
+
"model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
150 |
+
"model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
151 |
+
"model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
152 |
+
"model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
153 |
+
"model.layers.27.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
154 |
+
"model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
155 |
+
"model.layers.28.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
156 |
+
"model.layers.28.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
157 |
+
"model.layers.28.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
158 |
+
"model.layers.28.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
159 |
+
"model.layers.28.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
160 |
+
"model.layers.28.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
161 |
+
"model.layers.28.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
162 |
+
"model.layers.29.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
163 |
+
"model.layers.29.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
|
164 |
+
"model.layers.29.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
165 |
+
"model.layers.29.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
|
166 |
+
"model.layers.29.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
|
167 |
+
"model.layers.29.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
168 |
+
"model.layers.29.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
169 |
+
"model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
170 |
+
"model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
171 |
+
"model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
172 |
+
"model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
173 |
+
"model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
174 |
+
"model.layers.3.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
175 |
+
"model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
176 |
+
"model.layers.30.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
177 |
+
"model.layers.30.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
|
178 |
+
"model.layers.30.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
|
179 |
+
"model.layers.30.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
|
180 |
+
"model.layers.30.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
181 |
+
"model.layers.30.self_attn.W_pack.weight": "pytorch_model-00002-of-00003.bin",
|
182 |
+
"model.layers.30.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
|
183 |
+
"model.layers.31.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
184 |
+
"model.layers.31.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
|
185 |
+
"model.layers.31.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
|
186 |
+
"model.layers.31.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
|
187 |
+
"model.layers.31.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
188 |
+
"model.layers.31.self_attn.W_pack.weight": "pytorch_model-00003-of-00003.bin",
|
189 |
+
"model.layers.31.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
|
190 |
+
"model.layers.32.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
191 |
+
"model.layers.32.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
|
192 |
+
"model.layers.32.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
|
193 |
+
"model.layers.32.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
|
194 |
+
"model.layers.32.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
195 |
+
"model.layers.32.self_attn.W_pack.weight": "pytorch_model-00003-of-00003.bin",
|
196 |
+
"model.layers.32.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
|
197 |
+
"model.layers.33.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
198 |
+
"model.layers.33.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
|
199 |
+
"model.layers.33.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
|
200 |
+
"model.layers.33.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
|
201 |
+
"model.layers.33.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
202 |
+
"model.layers.33.self_attn.W_pack.weight": "pytorch_model-00003-of-00003.bin",
|
203 |
+
"model.layers.33.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
|
204 |
+
"model.layers.34.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
205 |
+
"model.layers.34.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
|
206 |
+
"model.layers.34.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
|
207 |
+
"model.layers.34.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
|
208 |
+
"model.layers.34.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
209 |
+
"model.layers.34.self_attn.W_pack.weight": "pytorch_model-00003-of-00003.bin",
|
210 |
+
"model.layers.34.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
|
211 |
+
"model.layers.35.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
212 |
+
"model.layers.35.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
|
213 |
+
"model.layers.35.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
|
214 |
+
"model.layers.35.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
|
215 |
+
"model.layers.35.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
216 |
+
"model.layers.35.self_attn.W_pack.weight": "pytorch_model-00003-of-00003.bin",
|
217 |
+
"model.layers.35.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
|
218 |
+
"model.layers.36.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
219 |
+
"model.layers.36.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
|
220 |
+
"model.layers.36.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
|
221 |
+
"model.layers.36.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
|
222 |
+
"model.layers.36.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
223 |
+
"model.layers.36.self_attn.W_pack.weight": "pytorch_model-00003-of-00003.bin",
|
224 |
+
"model.layers.36.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
|
225 |
+
"model.layers.37.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
226 |
+
"model.layers.37.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
|
227 |
+
"model.layers.37.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
|
228 |
+
"model.layers.37.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
|
229 |
+
"model.layers.37.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
230 |
+
"model.layers.37.self_attn.W_pack.weight": "pytorch_model-00003-of-00003.bin",
|
231 |
+
"model.layers.37.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
|
232 |
+
"model.layers.38.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
233 |
+
"model.layers.38.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
|
234 |
+
"model.layers.38.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
|
235 |
+
"model.layers.38.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
|
236 |
+
"model.layers.38.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
237 |
+
"model.layers.38.self_attn.W_pack.weight": "pytorch_model-00003-of-00003.bin",
|
238 |
+
"model.layers.38.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
|
239 |
+
"model.layers.39.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
240 |
+
"model.layers.39.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
|
241 |
+
"model.layers.39.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
|
242 |
+
"model.layers.39.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
|
243 |
+
"model.layers.39.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
|
244 |
+
"model.layers.39.self_attn.W_pack.weight": "pytorch_model-00003-of-00003.bin",
|
245 |
+
"model.layers.39.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
|
246 |
+
"model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
247 |
+
"model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
248 |
+
"model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
249 |
+
"model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
250 |
+
"model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
251 |
+
"model.layers.4.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
252 |
+
"model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
253 |
+
"model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
254 |
+
"model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
255 |
+
"model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
256 |
+
"model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
257 |
+
"model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
258 |
+
"model.layers.5.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
259 |
+
"model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
260 |
+
"model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
261 |
+
"model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
262 |
+
"model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
263 |
+
"model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
264 |
+
"model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
265 |
+
"model.layers.6.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
266 |
+
"model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
267 |
+
"model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
268 |
+
"model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
269 |
+
"model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
270 |
+
"model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
271 |
+
"model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
272 |
+
"model.layers.7.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
273 |
+
"model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
274 |
+
"model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
275 |
+
"model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
276 |
+
"model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
277 |
+
"model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
278 |
+
"model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
279 |
+
"model.layers.8.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
280 |
+
"model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
281 |
+
"model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
282 |
+
"model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
|
283 |
+
"model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
|
284 |
+
"model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
|
285 |
+
"model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
|
286 |
+
"model.layers.9.self_attn.W_pack.weight": "pytorch_model-00001-of-00003.bin",
|
287 |
+
"model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
|
288 |
+
"model.norm.weight": "pytorch_model-00003-of-00003.bin"
|
289 |
+
}
|
290 |
+
}
|
quantizer.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from typing import List
|
5 |
+
import bz2
|
6 |
+
import base64
|
7 |
+
import ctypes
|
8 |
+
from transformers.utils import logging
|
9 |
+
logger = logging.get_logger(__name__)
|
10 |
+
|
11 |
+
try:
|
12 |
+
from cpm_kernels.kernels.base import LazyKernelCModule, KernelFunction, round_up
|
13 |
+
|
14 |
+
class Kernel:
|
15 |
+
def __init__(self, code: bytes, function_names: List[str]):
|
16 |
+
self.code = code
|
17 |
+
self._function_names = function_names
|
18 |
+
self._cmodule = LazyKernelCModule(self.code)
|
19 |
+
|
20 |
+
for name in self._function_names:
|
21 |
+
setattr(self, name, KernelFunction(self._cmodule, name))
|
22 |
+
quantization_code = "QlpoOTFBWSZTWX/mUzwAK6f///////////////////////////////7f////////////4C5duvi2D0Oj1ppVCJ2zQFYbnbsxmq20pAC7kEDb3Z3nWrextY9NZbavON7nveSRqszudmzAGGgkeh0Pewk881e3Tz13kW9YO7uA9AUUiAWLNW2HHWCE005Mdz3jHs1Ic7QNCQBNGgmE000DRNoGjUYmA0mEmJjIaI9JtT0JoaaMTaQ0aMjTTI1TzKMmETwyaJ6k8p4Ke1T0wk2aE0anpPSHppqNM1HqYzVGj0MpsTTUGpoCAAEyAAAmhpPSYowMk9U8mqb0mJtU8ETwCZT1DQ9R5R6htE9TTyRptQeoyHqA0B6g9T1AD1HpGQGgD1A0NPUAAAA0A1Mg00gmhKPU9E2SekHoJ5QHlNDEPUeoDEaBkAHqBoABoNABoAaGgBoAAAAAAA0AAAAAAAAEmoiIgmiD0maRip+qfpR+k9U/QKaZPUepiGeST1HqeU9TQ9JoANAMhoZPU0AAYnqaBoAANABoAAAADQGgAAADTQ0IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAASJEE0AJo0GkxGJoZNKeBoTCnpNNpU9knqn+ppmUnom1PKZqTaaTTwTTFPNJ6pj1BG0eoaMgwQGkYAGk2gjT0jBqaY0RoDeqZoNEYT1NpsA/+iBrt+OVIiCKqfH7N/e67XZ2Dx9tPHyWbW4gAENNTtyzk+/WdoU604SoXU0JgfqgQxVmzbfdmaFcVxQAYINDyjTKU1FCUUzUuqqptg4SBgwIAHYE4NwQOrbY1bOF26LUVuxYr3Hp4paZXaqKU1UmXO3K+IXn2hURrgAegAaTANS+QBclUN6tpvhn85+uTPCLxzj34YO8MIMg45eRAEy9IYbKxeZTRnTy6GpPLtVGWKKK6iuDLa9wjtSmUQREX6wHfE3JeTVZdoj4Hg/3cHlBdw4c4BdGvigzZsubPr3eTi2hs6tZz3J9zUVm8qH+FPwSx4Tdr6by/OA88iLHk34rWNt7fT7NwqqqqqqqrGMYxjFcdqvY2mXyh42c2ccxhtyvBHojjUlyAKRgbvAB6nhls1wGLTOrfGMBsqRXl9Bl3sOlvafSA7sDrmAQI+mw90af+bvJ8mwjP+RKtjobGNzbfl76iTHMiIIUf9oIoygqSG2NLn0Ys/mZ+hzufu7epmzbvP1t7S0Xo8TKK7q6G5MA8vTgBb7Bf/2kITSLsH7Xmfydz7ahAt4YJbBuAQJI+1M8DLJCQH+UPbv212QWIhcCKhBrR2eryfQYIiIhKE0WtbOQ7OwM7OxtURGbF28NBndi9ejVDVA3dne37uDdzrwINS+O/0AzQTCgUjfCAwkkKFMT4Kr0aV3DicVAelGBesGYoCRcLKq5iBFR6SzOzrAwFWDFVYU2XT1oFaRJk2JBDOwVk1LFZZfwY7tQBYMGdECFA1cLZAg0IlfCTCMgZ4afRQBNvXSuMORVUTxTLSTgMFoUtaGLIr524yIM+INSFFIOHQ4TG5NZbd3Su3Nu9raSLd/ueibSYpAL0D42ZkAtD0pnXrfTxYPBw+mAt1cKPCPmDNMCDYCBiQwmANVhdDjBwsdIKyfH1slCvWbJC4QO8SBxi6A+GEpDBN6UQnPaEvBqFk3TwChKSowEENpyAueDIFs6OxxLRmFSUFpjWgYpECgDgfVBJjhg4GGcI9CD0S3igCrdziS3ZoYHlQE+7AELdvbebTVsdRvrPHCgiAbSYzUN0z0SCshLjaUaREEREQQRHNKAgAS9o0kukdJx0ulaJk0kINzlUYN0wWXLLsmRgSG1BEJNh5sCuVtIybGlKUW29BziJUTpqcA8UCCLtOGU0hH17BYTERfPKhCAwxJqSSSMd+umawlsykXZiKHesslqlVDKEHPzFhIWwJHTfcYCGE9dQK9sKixjNifLkW1iLnyZo57BBx2jksXPYjcaA6Z6rlYTl9ocZHn2URKVXnY/Wsrc5l3aym6Uq7u9eu2szSbJgwhqPqfOR1JCCZl7/AehLVBSIXc9npUk8IDzrRCS9XKMeamSDmFxK6OQDhwNnxubbnQygQb4DEL6oD5qkkG6F03dyDAUJB/awNUoDCa3CmYy2QIsK0Z46BoX1N4kY8aGNFB8WZAfWvaHeUT4gYIjEsZBBARIFAk2jCTxAmpW03GtdW4WCN0bLJiiqY3ixmHAWRqqQKqgS2hlf8mwszkhUy3LDx3GLdo5AHGAgC4BogUAVgH4QM0AGAImwbS6gwANIep0rJIU3hBgaeKAEcnzfs+g/sJZnETvInDcAH5fE7azmr8EyIFx77caxbrDBC64CEU8wCqzAHPgkk4kiPREKYHn2HaoDBWCCrFBrhR+XpeNQkdbzCBHee2hW8EW373k/qd/PxGC2R+IO4vmNEAl1AE0l4bEvmnfd5/JYs5gl9XpgQIS7g/LAK7owBwgso9j0yEB9MRIBjqmkLdG5uED3tICA6PYXe4WItRawAenfJ0lCFupoGvajxuQC/5YQPnwFpgQBMNgBndpgVNJcyw+5vCJgHtWU0EDYk2HsvD8Qkg6ANAd8UQXGH/3X3gXgNDefHyaQ/wd93Xx87hWWtW0kPCQGR+KYiPeMQse27PdNLGwhlz8WJObSnEQyHJw1JmStJXTtIg0ZKEHrLZCXd1ljLGkkxtpsDofXUiBH0LLEM43kb2waJ26KZsJ9sBbxcAqzUgWxzogNFm4vSxjMR58r5Xm8H2+6ItGcNX2AK3GhDIMzSX3YyFsbNG0u0MxvZzGFv19k2E45tXrK+1OKUYRiH2OT2Fs7kqtxMDrANVp2nxreAZg02UaFEsuf6+urQi1PxvNOhuacrStndOnonV3e5Du+Xjp8mjhiHYPNexu7UKSbt0Gs2rPIVVVSFyQ7phtQ0ZOUySoyZA79muzuLBZaLAW20gZIeuJDacErguFE3e70svo0S0mRBMBu33rjqVrNEN9A5PHvOgukEPEgb0tYAMrvcvIXB5ydzJHXQ1n+t7BUI24oJtSCTAUet75rBpXL4ylQ4LGBpbQeQCiOku+8rq90o18ga4WEGBDhvHB0YYd/CDLIMdDh2cO/i/RppcEi3Zd+CCU8OdxAAiOgi5qeghJkUnO6YGZi5LEilo2WhSiEVsU2IK7unV2rXG61Q/LbUqGx72rn2Uzx/q/fzsCWUFCQyAA+XqfGVGvL1kml0MVpjJl1A9vYoYTSatnV1+z2czsdoc4QFWLILHn1S71/r3V1S/fJMgDlXX6DVv8+FeECNi1u8zf8K8r1Khq7twFu5xPfZJT+PLpYUZWgGNDG0Jlq4rsQy86u95xqTdO0TbSGBdDOUSyyGHQAmP5mgNfVvgeY2tPzlKbyrvnaZhgQ7aWeJjzbF4mjPlro1hYjmnWUshKxVsQ6pveK850taANOgIE/aJvr0IAC0g2H2d1agVwnBkAF1kl7IPZc8mBthvlYish4AqABgI9hw2cExRabO+8Xz31+enwlCxSbnfVFlqig3UKGBQiybpEBGQLIxuoUMVYLTt53sY+lPlxSAq9f3lfnVlFmiBFrOhAeAF/0/N6HI6/+rsQ2+D5U5fenadDmtFFgeZLLESwOgWWIlgWFo+uFROhke3lKQ4bf0mLH3XSOgtDGd73hfMwDM2aF7Lonl7AlbiPbV2zY2lvu1Vj7jzlmFYoKieH93wt3fLhBXgYUGJEjga5YWEVyE00qIYWXSKd0ZaZy+vuCQlhaz5ELs9n/pjuFAHpoDCMEEtseECQF+Rk58EyW3nzCdlyCeY5WPItdkDZ4egXmjfZTLSVT29ku6KCGxHbdTBD3z52SxkuXkpoaHyy3t25+JwX5zFdYawDASl7397IB2tunNbt2FygaTBIO5qrG0asQmxEVRGCn26UX6DewTmic/QqkLZjdCTqjQDGlxy4IODucyQlmE0zkwSkR02cZjZcA1MzMczZAf1hfPnZT1IGtWIJGOcpzgYwCGyiNtoxRkupRElCCAgWJcE4igRJEQogPHYVAVBAEYDBkUEBIOSMK3KJNwQllpqWZARLCgMM8TkQoHOSZTDbSrjS6QtkYsQSloWSmQ4BlMjEJuuWh0ERMIVRLbcNDDQalLRQiEoBIUKZaiQpZQ1KoooVlNtjVVGAsG6WkNS84MJcoYIgjBrKaODOaUZG6QUZlCUGKy25MUVYGMWC+95zG4FRE0iyDRISulc0GQJt6m5u8WSQD4NAiDAMD9y0Q4TBGAaAIGe6PfdX9zl9Xginufp+HmPiAGfY8ZoDAarMoQAD9kA2OUJQV3lBq86RzpT8nbXPtqxsvN4YTDyOQgGEarV4Tc5h1yv2Npz+65PJpxO/Tefe5S5U1n8asAC3AQIACrUA5XacxgALbHvUfi9ApR956Do3PCWymCzTo7JjufU9DsGcQWqAFwwZfDzR+m6436pzvncYkARkLKOxX23RuLsQeK067Y/Fq8tB7igBMvb836/03fkV4qZ5YY4pFxADLifQb2iaUAwjesDs8Nhx5vnIw3rZOyb9+jyaYazgr2vbSKuf82URMcyf+99L2sWJHqW/I0PfaMR0KsULcnf9Lx/fJFzattuUwcjv8vdJed+FY1s49FrvJMbRVa82imzbdgSpDhEtleDphWrjgzVu59jsXKG/3f88zolkjqRQUk+Xm8F72190OzfqwfT5XAYbvq8WBzq/B+4rLP8j5PDfiytkicVOAAJ6QOe+hWqqwgfq61qtJ7jrsz89u1dDqsK/9Wur9Po5K1vHsXseRHoyF+LoewZ3uHaanw5S9LCW9Gj8k3e5ObY3NfjabO0cbzotaAPB3XIg+av5zaHst8ijMqapTpVtdwy211QZINMi1UCIHnAB3ZLFDZQuraVlNALggow5ygAhEo9EDHUCSm8+Hhev7eTufm8onZ7pATIUwBEBBUUEPBw/zcrl+pwtDJe2XApoPk8CJjTqtqbv7DYwZWFs/M8EhDcYE8AK8A+GfX/aQkYgSLdftV0Id/5gf3lOuNNC0799E3uYYtpMg6yABaJz5en+HpUfveNBXeYA8Whj8TtZK60F8V863ndv3PwKagCzpXtfv1APjaUgxkGLtptiZPR9vldS2Bfy0pT3RXWJlLCCj+GpAz28S4v0YQrYE7We9WpbVXz7KVTWEtoXM/UPZhYnpzdeokWJdNHQ6JQLxp7bOfci50rBcdOdhOqmyeC7B2rL6rxd969Xxc9L4zMrsqZ0+DoaPeSn8Y5QMLTOLpdvz1qaOO5xT1xPjgKnhTYa5pzi5U+bDcHXzYdxpgAbbhf/e8aBprxka5aM2J3lYXBG5G/r7CunzcPyjz2o79z8eDKkMvdO9WixswXLu3TkpoYcV0465fwUxoxC6L9Zwc+QsLDfqipk3wMSSRkBPM8Bxrwt0Mjr4IWW9Tw+Kw23yTbUyYJqrgNaq7saBKAdzYXMQ6mkrfqt72Lk0YwiZmIKkXUgChISCZMMrwdnjWbJDoR5ZXGxxAX5uRBfHBOk6JS8VVVWd56zxf8v3uR0/zON57e6BDuqIcQDJ7H0q5BNPaWbExYw2Bj4tRM9kB+JfynyyEfR/7ZiPXRFLmwpGGjLF9G6/J65mkUZEaKrUdBZYUxFKqGJL4LAbEfZjLi4GYXhv+x3ZpHkC3YADdMsKeYmfKgtzUd+Y7dVngbdcEFGAL3VqaYfYAYMtY3YKIQumTVXUFTFQyU0bqIeMgV2WOcZFXICpoMvueYVy0mHAiaeyNg1p5/QmSbYgyb7WQdUPfY3QeKc0hewGB2z2vH9t+pvy7B6P21pG+wXCMQHZl30TJonLPhQg8nka+raw1OLPUVWvIidrloKjcLH6/YAwepAoWEykQ9Bw2+YU/N5dbXnsNcPbubOszstYSwQYATYulLN0AHAgwb5t+VfATV6uhICgRgDGUaoVNNLc9ZMMW5+qKVhOyoRMLzJolo17ACLDPes+aoyeD5aIZm46HHKV7KqGX1IGbYEEDaAh0Vj+43wIMep+e+gsP4UEgVjmMAWTPz2XZhQDA6/Vzbk0fK+v0+bNB12LRbfmsufKzRgw7Hp7b+J+N2LqWXdwWTvhQ2rIPjc2cgS2A4Ub7IflPitJFAPyFvbvHK+tXi0Zcbi6mO6HTaIydOeYDmSYUIACAZwJCEgueoJnU7W6WfGdWtl1TdD4WHQ8AgDnmNUD+2YrjxNum3+1R9B+XSiSGrVLcFrVC/Z9R7D8DslIGyMPXbJAFthAMNYs7OdlqPilZtnwtReItC2Ff5vD8mQHwayX/vh1LB+HwoefoZ6LWUKb7WH6D0FmEhEKgwAayAYsoKUCcPepjDQYfA2TMWHoiS1lspYmEi2HdFULic/ucQlrFCCwPxyDeITAUsiAUFggCtZuDuVPLvVtM4WCG6DlrLwBL1JAaQFWuf7/uHZ1WAHEBuz9BMrshS8OhZpwrmYpgUIFoauEJQxtrw2iu9bT1ZLik/F26jhZblz7739qomvexIWc5hKq/GfFAebrnq/23mGuisbZhiROtNdFBDwqCBc2zrTYMfhMPwIF0s37CzzvYKeLjIfQZ3D2N6o+FRgDOkDGFGjCDiy9cJBVMOBWJ1AjDIxTAz/LwSRYuyzhHyDiECf0P53hWshYcMslf0PC0tWfLlUztN1xTxhwgkAudx+IE+NuS3phgEhRBo5lXEG6KhGydUzSU2WphfuFy0VkjH2AIPddbJ679s70tkL1rBEEEEmFgwK5pRCB6ZC5EX7ZCkCTI1pQUDJAwhQoosjBZFAjelFmydnwH9j46Ei5DD9ZaOvgT54UpSh4mD7FR2rjbJjFFdyOauUAjNr/DYBQJkLsUsd2mAXDIMHOuu8ULJhkx21G0UL7fnlqIPfiwdblRpcEaxVjru+6bHpdvj38qAOr1rUACbHrKGDWLFjGCBGYoGREGZBh4aGauRARRTmJdfJBWYoCDdFrBtCgYo6H8NyRIvFfbeTFjxF9riIiIiJABkRljjGMYx1mizcSoJ9AAFqKHXgBBgYnYjs06fFb2fl/bceQ8TeN4h1jrKPd/Pbtl3dl3fnbu7u7u7u7u7u7u7u7u79ZxeoA2gbgjyqd70779v47Lsepzo6y18vJkhQMaDKDNhYbWPpJA6hsD3pzguE4gtOhzrtDoDA3oMbPVBY/3fi0DbkWt7GQwMw2BtpNpeKt+v6KytGxxqCQ8JoLCGKIALFxqwIOeI7fqckjnW8eHjcW3xehEp2SWhvmrtDDdoBSOn6jSjQCgLuhd+EBOwr3q9GbUewJDA4QvH+DpFwt+JbtP30yJTy10KFMLT8MmAGUKkqn3DQHSmTACxjEheIpDhGuZT/WrsHgP+ly7Bsto8UYb2bBvwPRV1O/WaEbmIEMEbQtfphLgUDADF7nayfXs1CXBxYOi1aG36B7rr5EX31tzoym2bTIWw0maxvM3Gs+KAOSMztimS4oGQokBRf5dGKNykDp8tH9chWc9k7/6I+SxG5cZSnx52CFhoDqaZ8wBethxjRVKaRfCZTeBpi6ZNdZFjROy9x6tdgMem0rtuH6wbAz9tKvlhJ0JUP1e+2xVgroJFw8tQxLPdwVnLVMDu+mmfk9b5mK3qMNwiMyBqFaajMIgCDBYUXbdKwwVVhoMXL5YLkI5FFviIkYQTNamuapRILAqCSAYSsIOOVAtAUUrDwBSthRBgyVAM1wBrIQhhTlJKQIwFnj+b+aXuJyerhwx7HxQLofddtH71c6UuefecFIrANhfgkaIt5KL4iV43tMeP17BD8D7Dl8+AQTGQfz/rp3JWOfDodJOcvDAquYl1QQiHknUmAQ3lYpRUtJEUowXnnJnOZjZzdINlj+y7lXBb2uPR6a2E5AC3S6dBaJxYl1qyRXwQ15QflVkAK8AmAwql/n4frTztb/XRXV9J3eXRfv0MuB1OShRrtbrfdudwKxsAYC+QHiNISbAQu46ffUU/Flrw68uJ5L+7p69JjfglHs5PSd0bjADZeFsIWCqy0kQ20m3CskYLPShb0aoDdHoJBUQVEirAUgeRTtUBwAa0INXTIBPMHp9AongtXzSfuWCFQfDtzRuYRVG3WIXUjEg7b2vBZKT4ESq2tTcMyGXlqZN+uJ3CaGHEJB/3Q6/xrGIGIxyzCG5tLlSXx61sy0Bra4IFaYrjF1zJj5JPK/SslbN65uYffnqtyIX9zren+rrSsXVVhq8VZ6DFpnBVlD48AoMeltsyGSZSpdUjR6bM9J+oHRVmhpp2HBv+N4PXeS76ctP4LOLvreBzzyCr2v1K7eBo+dr2gwZ2x9k6EpHd7pNRl6Pv+IgXtj4WmtlEUQxkzWOVcT6jcLrhax5PVvgurz9q7DtdWriVdnpnTlTrQqdvWN6ZNr4OdpMM/T5Gg8irLXS/YOgvhteS49VEj8+IfNiPOf8MfMkUw+lYehdNxKZnNbjIoJiqRY1KVGIOWpRtq4m6GCyiypZKKzWBQq5j8RYJE0NCiyjJmgUmDBi8BoJgMVJYXMF4aGDL2XQ4HDKaRGaGhctNBrShK0bSU1BpFoRaTkkCCUWaDCx1MUXQCaGRhgoqhCHmzrFyZwUFG27KVdmNgbChCbZNAMghZRoXKM0CMEXaUTZswtBpLoCkxONrpa2wL0qn0mw2eV0yXs1MGgGSTcAo/GELIbpoe+8gKSqpV0ZIoIa4UCcM2EdVikuAPuDlU89YsXrb9Zb+Pr/F8NexBBbEwTQs9HmsQGBYPoK6bZKDvj9yyALrlOaMbLpKxRM+njvB4id/1Y1WPm3K2A0BVSlgWJNjYxne6JZ8mZfv7w1Nm3/GFOiwonktduZaRH2loGGhNBUlQiHENkybM8pBim0iaXcpE8dAF4GodlriMfOGH6hHY20huVvSlLDBRKHQ4Y3SyKrmCcy7ZZMDyNqVWWwpS+RHQaYnmEURGCKmQc8ARghpQffVMwK2vz6V97O+59X5foz4jUfN33Z49cKeKObXDE1rNvV2QaDOLOi+R0fl+RM8jVQ7QgNiDMzMgUCLlYO71Vn7X7vF0UcSZX1pu+s+xC4MZXNQCl0/rb68aAY3rOJ/jaw7EOYIIlln6V+oFpwZLOUjUVHfe6pdjXgAqsD219Ri16edZ03hcjePW71C29Wy0nTw5YIfs/Y9sNovb+v8vA1P7beB5bQmvEv59b+BnUs8yqQ5/cLKV0EZRMOGHmpsMrPidWDXTyP3fuO+w/9+kbujeEbdg+n4WXJQBn1kL3Py/M1JnkOu70oufaRPG6bsd6SUhq1TALBZAhKpoyMIvkQGRAzJD+udGR9e+WlVzjlJeqELl+D2smL4vG6BUFpiKHDwqftFBbX+9VV338vNg+5kL11bd1yrZaYZrGW36mrUIRi/MVgrNNITCj++zpFSOrRLE+Prlr3mYOP1TtXvtpOwLP5Kmt+3zZvXSsOXW+ix6mXS5mb1MnTvW0u8yHF356RuzXUyeGiLTe+IvXvKmJrEymIxQT9QMSU8WTHgnJi1BgP/WoqICgO21v9Hiw8IaXJY1619oEj/3cb/7R/nddLm6VA5xoN0t3XY6Hiep4VGnzs/Od0hj8f39YuAC5HvfwvWuOeV5fz820AAGglyrLFDjUrv//M/fwNdsEvj0MrTXrV8vLZfMvKMAzJ0/Sda/28/N0QniGmKhoagYUYMGp8IFDrOoi40L48r/SLxfSSDw9TM4P4vUeHE+iTmchyj7Vmwp7m7dejVSNZx+2Is5jzuf+HmHr2aml3fWein0wnXnxne72A86Cc3hrzXgbfc7lNQiJuGMljn2Y8pgXjrTczIy1teeafy8Tz8vmzBWAAFXfojX/x4Kv/YFNprgURbUBytnsI9/0WeuKmZjrWcumUGQgRDIEUsAwZkQMwPsGTJjpTEw7YAwCs7Oxn2XE+hexXn+z/L7HC65bJhCR3SxMdHngfkGgqJnhYzTGjw9StB6E4VI6SgkdNEdesLFW0cgxeYq7YABEPlMspZSBtZDQYZMvK9Cbu/UzXvja7MLlO4BfVYkMH5dwAfQ3u9WEkCoveLyp86iGmleemxREJQ0NoFyWpMxsNQCuuLGCdP703Uv1a3JeT7vfpxp8J+o/ft+J70dz7dV+1QEcxyT6REE6vsl2+0Yd8ayjKWBg2j8pRTeGhVxiYZDc6/YatrSzsw56wbWzGkp3FLpa8+60pan1LSvb+rcfyjTyEM7yC5BVyZL4r0qVCMZRc+AMHxlyZMP5QQiFATNqpVSdy8i66S7oSIl4APKPMzOTus/KeI8rrY6qBkuRSWT0y7LGvNz4KBjigkR4r0v9/bluxFmxePnvZRhpjgezOiX6bPa5LZkzsaLjmf6NzPP1ZfH9p7j4MsQL0YMETXjeb/5lAYcJWU1RECXppb+33HdO5Etl4xLXPxfV8cGZ43FFYXKVoMFQHssoAIzyiClcZR8W8vqiACqmcw8DAwzLM+FeLFaAYRiJ1DFqKh2Fcs+6Zd6erYKNpF09oZhCZNX4DO1OL94JPGTBXIPMmPjmDb0GlmwFaWG2CUqSjhc20YNd6Wwzu52BklGYvDcMnERi4Yh1wqwcOlqiLatNe4rj8FcXDxqMSsgYP5/FnSoTq2VVKttXQ3Gxq0q0Shp+qCbIAeWxu1Ynpd88H5zJfn/V+v+5/N7nyR7Q+n02bmML7aF1Sg+a32Ud2eQx2a8dQqTABf2SKJgvKADJgAJV8Rd0Wt1oIVj9nr/ZfC7fkbdqnS9R4eIbqH2HVNjOYdggfFeSAHKIkaC5R2rzEzdxs7dDCzizsiB7OluhJplyBBWKXPmS0tsUNnNs2D8zfW/QTSAr0EcsnQ/YPZBD4D0rHa3rkC2DHq+G97XfliTeY63fQow3RQpyKsCFgdUC2sF7aep4TmSDjlnDDpfIUJ3Ne7AMT4D7xpuM+j1hXBxYcyIpO3bvLubMhwY3Lrr6KfLP4PF0tpDjMOew5rBbSSUJPAfRMkDCSBum/B7S97oYaYZS56rtu79Vh408mfXcm6HcL0Qe7fRiqav0GhPcuxMpZIm/WHpICgBUirY8aK56MaW53+L/x+BbXNrjaySqntSLsoHFEiExu5hX7+yaqu7Ss2LrWVpPp9L8fuVDJdVcPqIQRFv/gWlUadkCUYMxFQf26Nlq3czS1/zwLAGILGRazcevp3q9/0O/YUWwXKvQTQghgHliLIIbcY0XxVr/9oV2++gsQ57NkRK084MjYapPJJ6Gd7WONsJRq6iIJo0GH/kO9e74wvERAiMW7UqLI+2obG59Xcazzvdk2UIhBDN4V/KqrwHJ9EpMftxjsugftMee96M9+G1DfnomWt7OmvNC5TP5/Fa50GNfJjieHFJ0mwlIothDYzg3BQyahykpudGZEmgiK9ViiKhI9ypBUuKuau8PitJWe1r0kVIrV4VRDTDa74vSvBytKDcNCzJ66Oq5G+hTTGgbpBMS6pJTOmrIjb0m9HsPvrI3rQhSkRYc1aEmn4+CFS9MpIxTpLccqtp+dpwTDqQfFDvleEeOfwGuSJEiR4QBtGkWjWrKysrJEiRI3Pd252xBk1NTBRRRZZZZZZZZZe4EJvbjqWGaaZgEypipYBc9da7d615Ozv+0TPBMoiPZt+OB7H2evtWBqyXzg9jgyNarCYQHxeABDu8KyT59xFO4fpXed3nMVTnQhwffnGz0DpW+c5RkbdjYgCQgDV6Sk3OZyVhq5u3M66CH4jQq6byDLwIv8D7ipARoPE7/rm7y2+93QALi1QT9F/QCxMDOQkHeUdC+o3NN9GXve/W1Ua/wcVgmxFD1YTuKB+xQIiSdMyXLjSbjWwNfsJH8DqADRWZHIyjHLolbAN4CAMrT3YQqcfwcVf9TtpcgPfzwWRN7XWJzrS1KzOVWXccRQ+9TusY64JEtzfyHJnKixBwcbgCBAgQiIiIiiqp3Pje3Y4/hFGgiIiqrTGMYxtsZSR3dlixYyrLVZTH79fh8yNTc4ezofRU9vjHOIATEYEQNb4IG7bzkD59jIzRNInn9c62cuu1ZkYpfHu7uokt8nd1Hc6ApKjEt2qqbEG2l6oUPERCkrFLjmUay3EPnj2vUe43MqIYdrm3PZT7WrLfnw7y9is1SEtuI3OsO3EW80l8imWVq1Yje2a7qnbRVNK7eZSUzwnE6j9CLm24oqbZ35UTokBKroRjwJNyCBEACLMRjnOy84O5zJREd0g8Xa+y0W7O3tcCI+46EvAjDUyqYnOCQAfEhYjlWVo9HFVl0Fk1g6rWywYXLyW9gmyJHKcFdans6g078Q9ryUjaXacP7/PvwauCguS3VK61FsSTIa5RZd+GJqurSiskfDyz7d0Bd7WxYHfJfTrpTamo87sRYMCEdyYaUdCzhu3027ABTtQCAnwKi9q3KK/rIpk6zEjGHEvADnOwuJ1nOvPr8XZNswFPZ07G/LauwBMG1tOWNT76s7Jw1OxxW1BImaJT6XUIQ/1VPRP6UZLBjAVwit2h7xS6TLbCUnzPvqOrOfrbFh/ZAFnP7jW/zIMkMNMUk5C20iKshen2HLTcv3ge8jBXRbUso7c88qlYXXozqDXWcHg21XXWzupu9YmNN2aY8W/tJ3ru1cs4YtK5b/YBitp4WYoOvZCpCIC0Ju2+xw3MABgLVFBetW9KA2pqTQMLlkKFfMNANN6+JBLD7W6/i0AiMi2fIgslxtlD+bdgBbDk1FxvsbR+npU23xUVtnBjvadzYRwqwnvWSPbrgxgFM01Y2yuGIJh4HBXDlmKSUokWxg39HUAD4u4+D8ivAiXNQkqnkKxTsDkVM+u/s6rx/w/VPZ1yL9nnzJm2YZ9Wl+9izPDiRnfzWU5Eo5duybQnktKu3b+J3pVuuBmmnebBXfiZtkpUjLRKvtuhD3GDAd3t8lPpMQgVQmkICwxxqhUhLQMPWxbwjlswPn5rmN8Fi0j25H0DYQMgIsU4+OvNxfxINfZR+ndisEVJrn6M1cgs+qsqW2AYv5gIBUG2nAI2sRJdPp0pkIFsJQ9DC0Exajuxg+5pGLShRHi9wPxlNGkITynkwYgPc5Bjm1ceZiqsTuXbr2ZrcqBszMKehW3A7cYHig2nqO46ef4275H+NjUxZ7Yxj0XWdJ+CBStOyj3EqZrP6f8049HRTOibY6aHBkysu7Zy/0S6gyH3v1st5NJVth4dqmwuarDr5z62e9OpPUqH6te3WRJmOs5XNggNsBgGGgo4SSlh/wYAXsqj3aHIiODcmQbAbQltCKcIoU5klptJHQ0l2P4Tgjad8WBWp9XyPm/j3QYeU5tV+GSJ4bCaYcK2PA4Spq7rr4bGK2La8fhcB+ZpbeVZdDoKcxwCBZQgvQmADvnSmoonhrOe7esVg+7JS5aUYwMCekjlC6YlQHUxfh1evKIB8OGrutYZ4YX41h6Jq6hHuvnBsJnjhYHY81i95iJiJTU6/T7VS3gB1qH0ACm35YBe58z7ceWShP5goYAvCcHOTphatcimJSi7e8cPtVNlLBeanev47WzlgmaIlrfg8PQALIwuyc+Ce7PTEdI6IMaL62wH5dzYaANEsRgmxYif+uWKupAwqrJ4eXO3BFsHrOiYQRSnB5GwA01qir3ZWamHuBtKIrzLS3by/XYFMY2AJEnhaR7ycHZFV8q2AKplu2J5dsQ24LL0qZisABXaOzHlwBFOQv0vOYWldhDsVt5f3Y4pEAsNwPQChB5QmJB9EYeqbx1Mx3plDVGMY02NMYxjG228wkHXLQBuctwIzDl0DNb2d3Zr2eV57mni8HxuT3pPieEQB9MdPlRq2ASoAJ5D34BKD2+jwhMSM3k9e3pXf6aOC4LK2IgIYJ4xQMEhhPzy+0BRQRAMTrG+uVq2FlPAAWvayCMW6HdOctiAZvYzmADuOlcPkF5QWJAaMRsb5I0Onl1kWwDFstny1tu3cPUt/f34gagGAiIG0z+LwJMwuBjAAO0oXQ+j2OhzkkDWu/H1iOt9LZS2d9xud3NjEIOUBcEGiLbYAIhuk6kG3QiZ7Vx448qOR0823ux6gaDAo/m7VGENCDY55QyihE8PY2c3FAOq0eB5VrR2rVOD8Pk54g10gYFruoShyCA600IlGADNkNWFwSUq26fo1MfJozZb8ivAWwKtUCnsIy1VVc6gilxgZXuOpIn5NqpQ4t1rnTCc+zVGQ8dLhuE4NDF7wA+sXOKNy3yzCWV69Yg3C0AUAEgSDmXcoIVu+dFgcdgdaEhA+iWl1AC/p9ikx5Lmxupjb3zEXwOwav5pXeGFu/i1uQdRtu2CBnIi7j7vIXJ+0+JkKDrtuikSysRrZuAkIPGGIXa2KOvhm+tzKtliPPcIGhgwSePz0mjUO5L7zzmcZMHoTM00cmhmTJXLHXXVL0wJj4s1MzRHFFiZHJnI5xbqYKxtqajjQWsuDBeCnFPf3bjFXVC0XXPfJZnZvcUOvlJ5TfVc9np7+YKcF8Pr101cACqIsDSQrhevDLMRutoELrdyRd4yc4EBhnWVGVUo4LsLWMYimrKjHNShUXacMGzWd1rteL0aqM9Wd9vU8jWwVgD0CDq0ypYdiu5V1wDsEFjDwLXJ6pe46MvOgOONLlAwPQwQmNUX+2AdnCCSJdjtaAefC8AY7bANwtVktFIQWVBQ95dSmjz8VnKFc5xsXgOQl3TQHPvghbPELlyOR3/IjaKbR4oXeqF4EjmEktr0SghMIXS60jhlBQIfEIJnyehMgiETwigxDpiHows1RgnEalhk2EzYwRLmRwajUmIaCFSzCXWStGaaJgaMaFOidK9crUyN2ZuYmDCMxbjQvOVrOaRTDXXVeCjhum+v9g5xzwDtdCQ0k+kA7IgR/IB4DE2B6gEv0Dv6l1YUCwQl4cgIQLDp7+vyQ0Ua6AogR/cA0tRku3sTszsBxdKvDwb0HSuapgWAtRzrmM+GLTWgg8og8IOyt6ZvFLTvQ6TdIU4jAZ9qJLorPPx8ToMIzve9bunjAzUZTwZAuejvlIVhEDGHZ43P+c2vnuH0s6xLjGN5IxE0xoW1w0CkEhDEzZIIIKKKJQkS+HFVRzrtPvD4ASgRgCszCJ7egCW+IZ1AZrFQIbETEL8gYz6s0SYtQwYi6Qsmdq1IQVCNcDQEDNHPNnw9vKmss525+DcQrAWHAQARzWHlAGPJFvL0qtVnM2mDSOxfDb56lUUmGI9SmNfCBxBRJtxwA+2eJCOmpSpXLFbYv8diZyMpTv2LEbyMNcTJr20IxsYzUrvRbyu5dvYHUZsRs8gfCLXUEVYi8a2a9PXF+ZtLPx0ZOLRblX8XTa0QJJSoa+VKRIKD5RCmFKYOIiBoFAUCXYIXCCWZKNExSIoiMUmCpS01EkRLAsoE0NCxCz8oQK0iCYNZrgS0sWA4zJgpKMgxYZxIN0k6OoboxHmMgmKyNy3rUrA2BW11g0yU50ArBdUNYm7rW6l+FmQDmsfUcr8Nxpt6ME1pzmPW2YuvyqQA1FEqGKaOFgPS4YwF0qjqJ96aNghQyxO4ETMPCpx6cPhE1xsRksh7qapVjAG7QQVa6blYCqhJolWKylASeNpfutZRkWEfehrAM1hps1M6VN9y+8pnOeOL3eSrvGKkr3kEDbExtsYADtYMAhLoFzWdZo6F3T89cLurlkYDQ8iWVgjINJHQatNc/BZZPPYhX7J3dX5zJTnZ1pJIV4y+k2MF25BTUhIvz2okmED6ax7KgYdJtMkMMjHiBpMVmJIippQbqyHkJreoQDGrZe8QH4qNpIBqEHFpVTrJVwkLCu5ds3+pbccosPAGFjP4J0AB15EXRr4rcAbXmibqr2600yb4dM8VbMHACFOCBZhZIxpWCMkDUZIBUQoKpooWCkAnBzOK5na/LqSSLTATYIaabQCteZkFlqs0bDPpuWAcNiRn6GWSnwrsatNVFIK0+WUGVX3p1UghXmamW9amFzoPHfP2Z3WLhW9ZEaq0DQiqOJyRC17MYwQA84eUDjyR/GOBNpNoO1pV6NwwsBZoAgBWz+M+YS5GC+Su1IEB0A5in0LwPQxXq7joeDPBdd3DzF6z96RTojxR29u8vE3GnO6jAa0MBmCuoxyYl/SDsbSpYIlMINttOUZndGWJ2JgBs8s7bw1GhnALOxFBnZayRRjt4bSvH+Ma9WNZSaKBoUDtDEQNIMt5XAZJIvEFZSahWUgL7ADIBAjZYJVAK8NHljSCRbLZdxbuCkFfrZVirL+GkBWYaJFCoglTaEWtiguhCVZNjj+c9eMUMbOVJQmcHOmKmRIKboAMkAbohUflNANgubKuhTXDGSlSKY0PetmdL+7bQoIJCVRY+osfasgH1NADQYBBoYd+dccoSIhapDyYkRkhkYGAZDWCMlJReDHnRJZKAxUYiJmPGYriVoGAkdW2QI785BQQakRBFiFEknMOMGpw8jj8a7sLaWrGrZ5gDnB2Ys6AFHfczh5BvVw8R6n1P4QHEbDeIf/i7kinChIP/Mpng="
|
23 |
+
kernels = Kernel(
|
24 |
+
bz2.decompress(base64.b64decode(quantization_code)),
|
25 |
+
[
|
26 |
+
"int4_to_fp16",
|
27 |
+
"fp16_to_int4",
|
28 |
+
"int8_to_fp16",
|
29 |
+
"fp16_to_int8",
|
30 |
+
"int4_to_bf16",
|
31 |
+
"bf16_to_int4",
|
32 |
+
"int8_to_bf16",
|
33 |
+
"bf16_to_int8",
|
34 |
+
],
|
35 |
+
)
|
36 |
+
except Exception as exception:
|
37 |
+
kernels = None
|
38 |
+
logger.warning("Failed to load kernels:" + str(exception))
|
39 |
+
|
40 |
+
def quant4(weight: torch.Tensor, scale: torch.Tensor):
|
41 |
+
stream = torch.cuda.current_stream()
|
42 |
+
num_row = weight.size(0)
|
43 |
+
num_chan_fp16 = weight.size(1)
|
44 |
+
# 4bit
|
45 |
+
num_chan_int = num_chan_fp16 // 8
|
46 |
+
qweight = torch.zeros((num_row, num_chan_int), dtype=torch.int32, device=weight.device)
|
47 |
+
intweight = torch.empty(num_row, num_chan_fp16, dtype = torch.int32)
|
48 |
+
intweight = torch.clip(torch.round(weight.to(scale.dtype) / scale[:, None]),-16, 15).to(dtype=torch.int32)
|
49 |
+
|
50 |
+
for j in range(num_chan_int):
|
51 |
+
qweight[:, j] = ((intweight[:, j*8+7] & 0x0f) << 28) \
|
52 |
+
| ((intweight[:, j*8+6] & 0x0f) << 24) \
|
53 |
+
| ((intweight[:, j*8+5] & 0x0f) << 20) \
|
54 |
+
| ((intweight[:, j*8+4] & 0x0f) << 16) \
|
55 |
+
| ((intweight[:, j*8+3] & 0x0f) << 12) \
|
56 |
+
| ((intweight[:, j*8+2] & 0x0f) << 8) \
|
57 |
+
| ((intweight[:, j*8+1] & 0x0f) << 4) \
|
58 |
+
| ((intweight[:, j*8] & 0x0f))
|
59 |
+
return qweight
|
60 |
+
|
61 |
+
def dequant4(qweight: torch.Tensor, scale: torch.Tensor, input: torch.Tensor):
|
62 |
+
stream = torch.cuda.current_stream()
|
63 |
+
num_row = qweight.size(0)
|
64 |
+
num_chan_int = qweight.size(1)
|
65 |
+
# 4bit
|
66 |
+
num_chan_fp16 = num_chan_int * 8
|
67 |
+
|
68 |
+
out = torch.empty((num_row, num_chan_fp16), dtype=input.dtype, device=qweight.device)
|
69 |
+
|
70 |
+
blockDim = (128, 1, 1)
|
71 |
+
gridDim = ((num_chan_int + blockDim[0] - 1) // blockDim[0], num_row, 1)
|
72 |
+
if input.dtype == torch.bfloat16:
|
73 |
+
kernels.int4_to_bf16(
|
74 |
+
gridDim,
|
75 |
+
blockDim,
|
76 |
+
0,
|
77 |
+
stream,
|
78 |
+
[ctypes.c_void_p(out.data_ptr()), ctypes.c_void_p(qweight.data_ptr()),
|
79 |
+
ctypes.c_void_p(scale.data_ptr()), ctypes.c_int32(num_row), ctypes.c_int32(num_chan_int), ctypes.c_int32(num_chan_fp16)],
|
80 |
+
)
|
81 |
+
elif input.dtype == torch.float16:
|
82 |
+
kernels.int4_to_fp16(
|
83 |
+
gridDim,
|
84 |
+
blockDim,
|
85 |
+
0,
|
86 |
+
stream,
|
87 |
+
[ctypes.c_void_p(out.data_ptr()), ctypes.c_void_p(qweight.data_ptr()),
|
88 |
+
ctypes.c_void_p(scale.data_ptr()), ctypes.c_int32(num_row), ctypes.c_int32(num_chan_int), ctypes.c_int32(num_chan_fp16)],
|
89 |
+
)
|
90 |
+
return out
|
91 |
+
|
92 |
+
class QLinear(torch.nn.Module):
|
93 |
+
def __init__(self, bits: int, weight: torch.Tensor, bias=None):
|
94 |
+
super().__init__()
|
95 |
+
self.quant_bits = bits
|
96 |
+
self.scale = weight.abs().max(dim=-1).values / ((2 ** (bits - 1)) - 1)
|
97 |
+
self.scale = self.scale.to(torch.float32)
|
98 |
+
if self.quant_bits == 4:
|
99 |
+
self.weight = quant4(weight, self.scale)
|
100 |
+
elif self.quant_bits == 8:
|
101 |
+
self.weight = torch.round(weight.to(self.scale.dtype) / self.scale[:, None]).to(torch.int8)
|
102 |
+
if self.quant_bits == 8:
|
103 |
+
self.weight = self.weight.T
|
104 |
+
self.bias = None
|
105 |
+
|
106 |
+
def forward(self, input):
|
107 |
+
if self.quant_bits == 4:
|
108 |
+
assert(input.dtype == torch.bfloat16 or input.dtype == torch.float16)
|
109 |
+
|
110 |
+
if self.weight.device != input.device:
|
111 |
+
self.weight = self.weight.to(input.device)
|
112 |
+
self.scale = self.scale.to(input.device)
|
113 |
+
|
114 |
+
if self.quant_bits == 4:
|
115 |
+
self.scale = self.scale.to(input.dtype)
|
116 |
+
rweight = dequant4(self.weight, self.scale, input).T
|
117 |
+
output = torch.matmul(input, rweight)
|
118 |
+
elif self.quant_bits == 8:
|
119 |
+
rweight = self.weight.to(input.dtype) * self.scale.to(input.dtype)
|
120 |
+
output = torch.matmul(input, rweight)
|
121 |
+
if self.bias is not None:
|
122 |
+
output = output + self.bias
|
123 |
+
return output
|
special_tokens_map.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": true
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": true
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<unk>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": true,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": true
|
22 |
+
},
|
23 |
+
"unk_token": {
|
24 |
+
"content": "<unk>",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": true,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": true
|
29 |
+
}
|
30 |
+
}
|
tokenization_baichuan.py
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (c) 2023, Baichuan Intelligent Technology. All rights reserved.
|
2 |
+
|
3 |
+
import os
|
4 |
+
from shutil import copyfile
|
5 |
+
from typing import Any, Dict, List, Optional, Tuple
|
6 |
+
|
7 |
+
import sentencepiece as spm
|
8 |
+
from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
|
9 |
+
from transformers.utils import logging
|
10 |
+
|
11 |
+
|
12 |
+
logger = logging.get_logger(__name__)
|
13 |
+
|
14 |
+
VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model"}
|
15 |
+
|
16 |
+
PRETRAINED_VOCAB_FILES_MAP = {
|
17 |
+
"vocab_file": {},
|
18 |
+
"tokenizer_file": {},
|
19 |
+
}
|
20 |
+
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {}
|
21 |
+
|
22 |
+
|
23 |
+
class BaichuanTokenizer(PreTrainedTokenizer):
|
24 |
+
"""
|
25 |
+
Construct a Baichuan tokenizer. Based on byte-level Byte-Pair-Encoding.
|
26 |
+
|
27 |
+
Args:
|
28 |
+
vocab_file (`str`):
|
29 |
+
Path to the vocabulary file.
|
30 |
+
"""
|
31 |
+
|
32 |
+
vocab_files_names = VOCAB_FILES_NAMES
|
33 |
+
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
|
34 |
+
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
|
35 |
+
model_input_names = ["input_ids", "attention_mask"]
|
36 |
+
|
37 |
+
def __init__(
|
38 |
+
self,
|
39 |
+
vocab_file,
|
40 |
+
unk_token="<unk>",
|
41 |
+
bos_token="<s>",
|
42 |
+
eos_token="</s>",
|
43 |
+
pad_token=None,
|
44 |
+
sp_model_kwargs: Optional[Dict[str, Any]] = None,
|
45 |
+
add_bos_token=True,
|
46 |
+
add_eos_token=False,
|
47 |
+
clean_up_tokenization_spaces=False,
|
48 |
+
**kwargs,
|
49 |
+
):
|
50 |
+
self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
|
51 |
+
bos_token = AddedToken(bos_token, lstrip=False, rstrip=False) if isinstance(bos_token, str) else bos_token
|
52 |
+
eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
|
53 |
+
unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
|
54 |
+
pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
|
55 |
+
super().__init__(
|
56 |
+
bos_token=bos_token,
|
57 |
+
eos_token=eos_token,
|
58 |
+
unk_token=unk_token,
|
59 |
+
pad_token=pad_token,
|
60 |
+
add_bos_token=add_bos_token,
|
61 |
+
add_eos_token=add_eos_token,
|
62 |
+
sp_model_kwargs=self.sp_model_kwargs,
|
63 |
+
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
|
64 |
+
**kwargs,
|
65 |
+
)
|
66 |
+
self.vocab_file = vocab_file
|
67 |
+
self.add_bos_token = add_bos_token
|
68 |
+
self.add_eos_token = add_eos_token
|
69 |
+
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
|
70 |
+
self.sp_model.Load(vocab_file)
|
71 |
+
|
72 |
+
def __getstate__(self):
|
73 |
+
state = self.__dict__.copy()
|
74 |
+
state["sp_model"] = None
|
75 |
+
return state
|
76 |
+
|
77 |
+
def __setstate__(self, d):
|
78 |
+
self.__dict__ = d
|
79 |
+
self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
|
80 |
+
self.sp_model.Load(self.vocab_file)
|
81 |
+
|
82 |
+
@property
|
83 |
+
def vocab_size(self):
|
84 |
+
"""Returns vocab size"""
|
85 |
+
return self.sp_model.get_piece_size()
|
86 |
+
|
87 |
+
def get_vocab(self):
|
88 |
+
"""Returns vocab as a dict"""
|
89 |
+
vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
|
90 |
+
vocab.update(self.added_tokens_encoder)
|
91 |
+
return vocab
|
92 |
+
|
93 |
+
def _tokenize(self, text):
|
94 |
+
"""Returns a tokenized string."""
|
95 |
+
return self.sp_model.encode(text, out_type=str)
|
96 |
+
|
97 |
+
def _convert_token_to_id(self, token):
|
98 |
+
"""Converts a token (str) in an id using the vocab."""
|
99 |
+
return self.sp_model.piece_to_id(token)
|
100 |
+
|
101 |
+
def _convert_id_to_token(self, index):
|
102 |
+
"""Converts an index (integer) in a token (str) using the vocab."""
|
103 |
+
token = self.sp_model.IdToPiece(index)
|
104 |
+
return token
|
105 |
+
|
106 |
+
def convert_tokens_to_string(self, tokens):
|
107 |
+
"""Converts a sequence of tokens (string) in a single string."""
|
108 |
+
current_sub_tokens = []
|
109 |
+
out_string = ""
|
110 |
+
prev_is_special = False
|
111 |
+
for i, token in enumerate(tokens):
|
112 |
+
# make sure that special tokens are not decoded using sentencepiece model
|
113 |
+
if token in self.all_special_tokens:
|
114 |
+
if not prev_is_special and i != 0:
|
115 |
+
out_string += " "
|
116 |
+
out_string += self.sp_model.decode(current_sub_tokens) + token
|
117 |
+
prev_is_special = True
|
118 |
+
current_sub_tokens = []
|
119 |
+
else:
|
120 |
+
current_sub_tokens.append(token)
|
121 |
+
prev_is_special = False
|
122 |
+
out_string += self.sp_model.decode(current_sub_tokens)
|
123 |
+
return out_string
|
124 |
+
|
125 |
+
def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
|
126 |
+
"""
|
127 |
+
Save the vocabulary and special tokens file to a directory.
|
128 |
+
|
129 |
+
Args:
|
130 |
+
save_directory (`str`):
|
131 |
+
The directory in which to save the vocabulary.
|
132 |
+
|
133 |
+
Returns:
|
134 |
+
`Tuple(str)`: Paths to the files saved.
|
135 |
+
"""
|
136 |
+
if not os.path.isdir(save_directory):
|
137 |
+
logger.error(f"Vocabulary path ({save_directory}) should be a directory")
|
138 |
+
return
|
139 |
+
out_vocab_file = os.path.join(
|
140 |
+
save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
|
141 |
+
)
|
142 |
+
|
143 |
+
if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
|
144 |
+
copyfile(self.vocab_file, out_vocab_file)
|
145 |
+
elif not os.path.isfile(self.vocab_file):
|
146 |
+
with open(out_vocab_file, "wb") as fi:
|
147 |
+
content_spiece_model = self.sp_model.serialized_model_proto()
|
148 |
+
fi.write(content_spiece_model)
|
149 |
+
|
150 |
+
return (out_vocab_file,)
|
151 |
+
|
152 |
+
def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
|
153 |
+
bos_token_id = [self.bos_token_id] if self.add_bos_token else []
|
154 |
+
eos_token_id = [self.eos_token_id] if self.add_eos_token else []
|
155 |
+
|
156 |
+
output = bos_token_id + token_ids_0 + eos_token_id
|
157 |
+
|
158 |
+
if token_ids_1 is not None:
|
159 |
+
output = output + bos_token_id + token_ids_1 + eos_token_id
|
160 |
+
|
161 |
+
return output
|
162 |
+
|
163 |
+
def get_special_tokens_mask(
|
164 |
+
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
|
165 |
+
) -> List[int]:
|
166 |
+
"""
|
167 |
+
Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
|
168 |
+
special tokens using the tokenizer `prepare_for_model` method.
|
169 |
+
|
170 |
+
Args:
|
171 |
+
token_ids_0 (`List[int]`):
|
172 |
+
List of IDs.
|
173 |
+
token_ids_1 (`List[int]`, *optional*):
|
174 |
+
Optional second list of IDs for sequence pairs.
|
175 |
+
already_has_special_tokens (`bool`, *optional*, defaults to `False`):
|
176 |
+
Whether or not the token list is already formatted with special tokens for the model.
|
177 |
+
|
178 |
+
Returns:
|
179 |
+
`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
|
180 |
+
"""
|
181 |
+
if already_has_special_tokens:
|
182 |
+
return super().get_special_tokens_mask(
|
183 |
+
token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
|
184 |
+
)
|
185 |
+
|
186 |
+
bos_token_id = [1] if self.add_bos_token else []
|
187 |
+
eos_token_id = [1] if self.add_eos_token else []
|
188 |
+
|
189 |
+
if token_ids_1 is None:
|
190 |
+
return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
|
191 |
+
return (
|
192 |
+
bos_token_id
|
193 |
+
+ ([0] * len(token_ids_0))
|
194 |
+
+ eos_token_id
|
195 |
+
+ bos_token_id
|
196 |
+
+ ([0] * len(token_ids_1))
|
197 |
+
+ eos_token_id
|
198 |
+
)
|
199 |
+
|
200 |
+
def create_token_type_ids_from_sequences(
|
201 |
+
self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
|
202 |
+
) -> List[int]:
|
203 |
+
"""
|
204 |
+
Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
|
205 |
+
sequence pair mask has the following format:
|
206 |
+
|
207 |
+
```
|
208 |
+
0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
|
209 |
+
| first sequence | second sequence |
|
210 |
+
```
|
211 |
+
|
212 |
+
if token_ids_1 is None, only returns the first portion of the mask (0s).
|
213 |
+
|
214 |
+
Args:
|
215 |
+
token_ids_0 (`List[int]`):
|
216 |
+
List of ids.
|
217 |
+
token_ids_1 (`List[int]`, *optional*):
|
218 |
+
Optional second list of IDs for sequence pairs.
|
219 |
+
|
220 |
+
Returns:
|
221 |
+
`List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
|
222 |
+
"""
|
223 |
+
bos_token_id = [self.bos_token_id] if self.add_bos_token else []
|
224 |
+
eos_token_id = [self.eos_token_id] if self.add_eos_token else []
|
225 |
+
|
226 |
+
output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
|
227 |
+
|
228 |
+
if token_ids_1 is not None:
|
229 |
+
output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
|
230 |
+
|
231 |
+
return output
|
232 |
+
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7d1ab69d25c74644af5c5e4dcd1cc6e96d33783dbd257b6bdea55b643c72813
|
3 |
+
size 1136765
|
tokenizer_config.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"auto_map": {
|
5 |
+
"AutoTokenizer": [
|
6 |
+
"tokenization_baichuan.BaichuanTokenizer",
|
7 |
+
null
|
8 |
+
]
|
9 |
+
},
|
10 |
+
"bos_token": {
|
11 |
+
"__type": "AddedToken",
|
12 |
+
"content": "<s>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": true,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": true
|
17 |
+
},
|
18 |
+
"clean_up_tokenization_spaces": false,
|
19 |
+
"eos_token": {
|
20 |
+
"__type": "AddedToken",
|
21 |
+
"content": "</s>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": true,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": true
|
26 |
+
},
|
27 |
+
"model_max_length": 4096,
|
28 |
+
"pad_token": {
|
29 |
+
"__type": "AddedToken",
|
30 |
+
"content": "<unk>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": true,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": true
|
35 |
+
},
|
36 |
+
"sp_model_kwargs": {},
|
37 |
+
"tokenizer_class": "BaichuanTokenizer",
|
38 |
+
"unk_token": {
|
39 |
+
"__type": "AddedToken",
|
40 |
+
"content": "<unk>",
|
41 |
+
"lstrip": false,
|
42 |
+
"normalized": true,
|
43 |
+
"rstrip": false,
|
44 |
+
"single_word": true
|
45 |
+
}
|
46 |
+
}
|