openlamm commited on
Commit
ae3f331
·
1 Parent(s): 13e2f64

make llm local

Browse files
app.py CHANGED
@@ -1,259 +1,259 @@
1
- from transformers import AutoModel, AutoTokenizer
2
- from copy import deepcopy
3
- import gradio as gr
4
- import mdtex2html
5
- from model.openlamm import LAMMPEFTModel
6
- import torch
7
- import json
8
-
9
- # init the model
10
- args = {
11
- 'model': 'openllama_peft',
12
- 'imagebind_ckpt_path': '../model_zoo/imagebind_ckpt',
13
- 'vicuna_ckpt_path': 'openlamm/llm_7b_v0',
14
- 'delta_ckpt_path': './pretrained_ckpt/llm7b_lora32_lamm186k/pytorch_model.pt',
15
- 'stage': 2,
16
- 'max_tgt_len': 128,
17
- 'lora_r': 32,
18
- 'lora_alpha': 32,
19
- 'lora_dropout': 0.1,
20
- 'lora_target_modules': ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
21
- 'vision_type': 'image',
22
- 'vision_feature_type': 'local',
23
- 'num_vision_token': 256,
24
- 'encoder_pretrain': 'clip',
25
- 'system_header': True,
26
- }
27
-
28
- model = LAMMPEFTModel(**args)
29
- delta_ckpt = torch.load(args['delta_ckpt_path'], map_location=torch.device('cpu'))
30
- model.load_state_dict(delta_ckpt, strict=False)
31
- model = model.eval().half().cuda()
32
- print(f'[!] init the 13b model over ...')
33
-
34
- """Override Chatbot.postprocess"""
35
-
36
-
37
- def postprocess(self, y):
38
- if y is None:
39
- return []
40
- for i, (message, response) in enumerate(y):
41
- y[i] = (
42
- None if message is None else mdtex2html.convert((message)),
43
- None if response is None else mdtex2html.convert(response),
44
- )
45
- return y
46
-
47
-
48
- gr.Chatbot.postprocess = postprocess
49
-
50
-
51
- def parse_text(text):
52
- """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
53
- lines = text.split("\n")
54
- lines = [line for line in lines if line != ""]
55
- count = 0
56
- for i, line in enumerate(lines):
57
- if "```" in line:
58
- count += 1
59
- items = line.split('`')
60
- if count % 2 == 1:
61
- lines[i] = f'<pre><code class="language-{items[-1]}">'
62
- else:
63
- lines[i] = f'<br></code></pre>'
64
- else:
65
- if i > 0:
66
- if count % 2 == 1:
67
- line = line.replace("`", "\`")
68
- line = line.replace("<", "&lt;")
69
- line = line.replace(">", "&gt;")
70
- line = line.replace(" ", "&nbsp;")
71
- line = line.replace("*", "&ast;")
72
- line = line.replace("_", "&lowbar;")
73
- line = line.replace("-", "&#45;")
74
- line = line.replace(".", "&#46;")
75
- line = line.replace("!", "&#33;")
76
- line = line.replace("(", "&#40;")
77
- line = line.replace(")", "&#41;")
78
- line = line.replace("$", "&#36;")
79
- lines[i] = "<br>"+line
80
- text = "".join(lines)
81
- if text.endswith("##"):
82
- text = text[:-2]
83
- return text
84
-
85
-
86
- def re_predict(
87
- input,
88
- image_path,
89
- chatbot,
90
- max_length,
91
- top_p,
92
- temperature,
93
- history,
94
- modality_cache,
95
- ):
96
- # drop the latest query and answers and generate again
97
- q, a = history.pop()
98
- chatbot.pop()
99
- return predict(q, image_path, chatbot, max_length, top_p, temperature, history, modality_cache)
100
-
101
-
102
- def predict(
103
- input,
104
- image_path,
105
- chatbot,
106
- max_length,
107
- top_p,
108
- temperature,
109
- history,
110
- modality_cache,
111
- ):
112
- if image_path is None: #
113
- return [(input, "There is no input data provided! Please upload your data and start the conversation.")]
114
- else:
115
- print(f'[!] image path: {image_path}\n') # [!] audio path: {audio_path}\n[!] video path: {video_path}\n[!] thermal path: {thermal_path}')
116
-
117
- # prepare the prompt
118
- prompt_text = ''
119
- for idx, (q, a) in enumerate(history):
120
- if idx == 0:
121
- prompt_text += f'{q}\n### Assistant: {a}\n###'
122
- else:
123
- prompt_text += f' Human: {q}\n### Assistant: {a}\n###'
124
- if len(history) == 0:
125
- prompt_text += f'{input}'
126
- else:
127
- prompt_text += f' Human: {input}'
128
-
129
- response = model.generate({
130
- 'prompt': [prompt_text] if not isinstance(prompt_text, list) else prompt_text,
131
- 'image_paths': [image_path] if image_path else [],
132
- 'top_p': top_p,
133
- 'temperature': temperature,
134
- 'max_tgt_len': max_length,
135
- 'modality_embeds': modality_cache
136
- })
137
- if isinstance(response, list):
138
- response = response[0]
139
- chatbot.append((parse_text(input), parse_text(response)))
140
- history.append((input, response))
141
- return chatbot, history, modality_cache
142
-
143
-
144
- def reset_user_input():
145
- return gr.update(value='')
146
-
147
- def reset_dialog():
148
- return [], []
149
-
150
- def reset_state():
151
- return None, [], [], []
152
-
153
-
154
- with gr.Blocks(scale=4) as demo:
155
- gr.Image("./images/lamm_title.png", show_label=False, height=50)
156
- gr.HTML(
157
- """
158
- <p>
159
- <p align="center">
160
- <font size='4'>
161
- <a href="https://openlamm.github.io/" target="_blank">🏠 Home Page</a> • <a href="https://github.com/OpenLAMM/LAMM" target="_blank">🌏 Github</a> • <a href="https://arxiv.org/pdf/2306.06687.pdf" target="_blank">📰 Paper</a> • <a href="https://www.youtube.com/watch?v=M7XlIe8hhPk" target="_blank">▶️ YouTube </a> • <a href="https://www.bilibili.com/video/BV1kN411D7kt/?share_source=copy_web&vd_source=ab4c734425ed0114898300f2c037ac0b" target="_blank"> 📺 Bilibili • <a href="https://opendatalab.com/LAMM" target="_blank">📀 Data</a> • <a href="https://huggingface.co/openlamm" target="_blank">📦 LAMM Models</a>
162
- </font>
163
- </p>
164
- </p>
165
- """
166
- )
167
- # gr.HTML("""<h1>LAMM: Language-Assisted Multi-Modal Instruction-Tuning Dataset, Framework, and Benchmark</h1>""")
168
- # gr.Markdown(
169
- # """
170
- # <p>
171
-
172
- # <a href="https://arxiv.org/pdf/2306.06687.pdf" target="_blank"><img src="https://img.shields.io/badge/arxiv-PDF-red"/></a>
173
-
174
- # <a href="https://openlamm.github.io" target="_blank"><img src="https://img.shields.io/badge/LAMM-HomePage-blue"/></a>
175
-
176
- # <a href="https://opendatalab.com/LAMM" target="_blank"><img src="https://img.shields.io/badge/LAMM-Dataset-green"/></a>
177
-
178
- # <a href="https://www.youtube.com/watch?v=M7XlIe8hhPk" target="_blank"><img src="https://img.shields.io/badge/video-Youtube-red"/></a>
179
-
180
- # <a href="https://www.bilibili.com/video/BV1kN411D7kt/?share_source=copy_web&vd_source=ab4c734425ed0114898300f2c037ac0b" target="_blank"><img src="https://img.shields.io/badge/video-Bilibili-blue"/></a>
181
-
182
- # <a href="https://github.com/OpenLAMM/LAMM" target="_blank"><img src="https://img.shields.io/badge/Repo-Github-white"/></a>
183
-
184
- # <a href="https://huggingface.co/openlamm" target="_blank"><img src="https://img.shields.io/badge/Models-huggingface-yellow"/></a>
185
-
186
- # <img src="https://img.shields.io/github/stars/OpenLAMM/LAMM.svg?style=social&label=Star"/>
187
- # </p>
188
- # Drop your image & Start talking with LAMM models.
189
- # """)
190
-
191
- with gr.Row(scale=1):
192
- with gr.Column(scale=1):
193
- image_path = gr.Image(type="filepath", label="Image", value=None).style(height=600)
194
-
195
- chatbot = gr.Chatbot(scale=1).style(height=600)
196
-
197
- with gr.Row():
198
- with gr.Column(scale=4):
199
- with gr.Column(scale=12):
200
- user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(container=False)
201
- with gr.Column(min_width=32, scale=1):
202
- with gr.Row(scale=1):
203
- submitBtn = gr.Button("Submit", variant="primary")
204
- with gr.Row(scale=1):
205
- resubmitBtn = gr.Button("Resubmit", variant="primary")
206
- with gr.Column(scale=1):
207
- emptyBtn = gr.Button("Clear History")
208
- max_length = gr.Slider(0, 600, value=256, step=1.0, label="Maximum length", interactive=True)
209
- top_p = gr.Slider(0, 1, value=0.01, step=0.01, label="Top P", interactive=True)
210
- temperature = gr.Slider(0, 1, value=0.9, step=0.01, label="Temperature", interactive=True)
211
-
212
- history = gr.State([])
213
- modality_cache = gr.State([])
214
-
215
- submitBtn.click(
216
- predict, [
217
- user_input,
218
- image_path,
219
- chatbot,
220
- max_length,
221
- top_p,
222
- temperature,
223
- history,
224
- modality_cache,
225
- ], [
226
- chatbot,
227
- history,
228
- modality_cache
229
- ],
230
- show_progress=True
231
- )
232
-
233
- resubmitBtn.click(
234
- re_predict, [
235
- user_input,
236
- image_path,
237
- chatbot,
238
- max_length,
239
- top_p,
240
- temperature,
241
- history,
242
- modality_cache,
243
- ], [
244
- chatbot,
245
- history,
246
- modality_cache
247
- ],
248
- show_progress=True
249
- )
250
-
251
- submitBtn.click(reset_user_input, [], [user_input])
252
- emptyBtn.click(reset_state, outputs=[
253
- image_path,
254
- chatbot,
255
- history,
256
- modality_cache
257
- ], show_progress=True)
258
-
259
- demo.queue().launch(enable_queue=True)
 
1
+ from transformers import AutoModel, AutoTokenizer
2
+ from copy import deepcopy
3
+ import gradio as gr
4
+ import mdtex2html
5
+ from model.openlamm import LAMMPEFTModel
6
+ import torch
7
+ import json
8
+
9
+ # init the model
10
+ args = {
11
+ 'model': 'openllama_peft',
12
+ 'imagebind_ckpt_path': '../model_zoo/imagebind_ckpt',
13
+ 'vicuna_ckpt_path': './pretrained_ckpt/llm_7b_v0',
14
+ 'delta_ckpt_path': './pretrained_ckpt/llm7b_lora32_lamm186k/pytorch_model.pt',
15
+ 'stage': 2,
16
+ 'max_tgt_len': 128,
17
+ 'lora_r': 32,
18
+ 'lora_alpha': 32,
19
+ 'lora_dropout': 0.1,
20
+ 'lora_target_modules': ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
21
+ 'vision_type': 'image',
22
+ 'vision_feature_type': 'local',
23
+ 'num_vision_token': 256,
24
+ 'encoder_pretrain': 'clip',
25
+ 'system_header': True,
26
+ }
27
+
28
+ model = LAMMPEFTModel(**args)
29
+ delta_ckpt = torch.load(args['delta_ckpt_path'], map_location=torch.device('cpu'))
30
+ model.load_state_dict(delta_ckpt, strict=False)
31
+ model = model.eval().half().cuda()
32
+ print(f'[!] init the 13b model over ...')
33
+
34
+ """Override Chatbot.postprocess"""
35
+
36
+
37
+ def postprocess(self, y):
38
+ if y is None:
39
+ return []
40
+ for i, (message, response) in enumerate(y):
41
+ y[i] = (
42
+ None if message is None else mdtex2html.convert((message)),
43
+ None if response is None else mdtex2html.convert(response),
44
+ )
45
+ return y
46
+
47
+
48
+ gr.Chatbot.postprocess = postprocess
49
+
50
+
51
+ def parse_text(text):
52
+ """copy from https://github.com/GaiZhenbiao/ChuanhuChatGPT/"""
53
+ lines = text.split("\n")
54
+ lines = [line for line in lines if line != ""]
55
+ count = 0
56
+ for i, line in enumerate(lines):
57
+ if "```" in line:
58
+ count += 1
59
+ items = line.split('`')
60
+ if count % 2 == 1:
61
+ lines[i] = f'<pre><code class="language-{items[-1]}">'
62
+ else:
63
+ lines[i] = f'<br></code></pre>'
64
+ else:
65
+ if i > 0:
66
+ if count % 2 == 1:
67
+ line = line.replace("`", "\`")
68
+ line = line.replace("<", "&lt;")
69
+ line = line.replace(">", "&gt;")
70
+ line = line.replace(" ", "&nbsp;")
71
+ line = line.replace("*", "&ast;")
72
+ line = line.replace("_", "&lowbar;")
73
+ line = line.replace("-", "&#45;")
74
+ line = line.replace(".", "&#46;")
75
+ line = line.replace("!", "&#33;")
76
+ line = line.replace("(", "&#40;")
77
+ line = line.replace(")", "&#41;")
78
+ line = line.replace("$", "&#36;")
79
+ lines[i] = "<br>"+line
80
+ text = "".join(lines)
81
+ if text.endswith("##"):
82
+ text = text[:-2]
83
+ return text
84
+
85
+
86
+ def re_predict(
87
+ input,
88
+ image_path,
89
+ chatbot,
90
+ max_length,
91
+ top_p,
92
+ temperature,
93
+ history,
94
+ modality_cache,
95
+ ):
96
+ # drop the latest query and answers and generate again
97
+ q, a = history.pop()
98
+ chatbot.pop()
99
+ return predict(q, image_path, chatbot, max_length, top_p, temperature, history, modality_cache)
100
+
101
+
102
+ def predict(
103
+ input,
104
+ image_path,
105
+ chatbot,
106
+ max_length,
107
+ top_p,
108
+ temperature,
109
+ history,
110
+ modality_cache,
111
+ ):
112
+ if image_path is None: #
113
+ return [(input, "There is no input data provided! Please upload your data and start the conversation.")]
114
+ else:
115
+ print(f'[!] image path: {image_path}\n') # [!] audio path: {audio_path}\n[!] video path: {video_path}\n[!] thermal path: {thermal_path}')
116
+
117
+ # prepare the prompt
118
+ prompt_text = ''
119
+ for idx, (q, a) in enumerate(history):
120
+ if idx == 0:
121
+ prompt_text += f'{q}\n### Assistant: {a}\n###'
122
+ else:
123
+ prompt_text += f' Human: {q}\n### Assistant: {a}\n###'
124
+ if len(history) == 0:
125
+ prompt_text += f'{input}'
126
+ else:
127
+ prompt_text += f' Human: {input}'
128
+
129
+ response = model.generate({
130
+ 'prompt': [prompt_text] if not isinstance(prompt_text, list) else prompt_text,
131
+ 'image_paths': [image_path] if image_path else [],
132
+ 'top_p': top_p,
133
+ 'temperature': temperature,
134
+ 'max_tgt_len': max_length,
135
+ 'modality_embeds': modality_cache
136
+ })
137
+ if isinstance(response, list):
138
+ response = response[0]
139
+ chatbot.append((parse_text(input), parse_text(response)))
140
+ history.append((input, response))
141
+ return chatbot, history, modality_cache
142
+
143
+
144
+ def reset_user_input():
145
+ return gr.update(value='')
146
+
147
+ def reset_dialog():
148
+ return [], []
149
+
150
+ def reset_state():
151
+ return None, [], [], []
152
+
153
+
154
+ with gr.Blocks(scale=4) as demo:
155
+ gr.Image("./images/lamm_title.png", show_label=False, height=50)
156
+ gr.HTML(
157
+ """
158
+ <p>
159
+ <p align="center">
160
+ <font size='4'>
161
+ <a href="https://openlamm.github.io/" target="_blank">🏠 Home Page</a> • <a href="https://github.com/OpenLAMM/LAMM" target="_blank">🌏 Github</a> • <a href="https://arxiv.org/pdf/2306.06687.pdf" target="_blank">📰 Paper</a> • <a href="https://www.youtube.com/watch?v=M7XlIe8hhPk" target="_blank">▶️ YouTube </a> • <a href="https://www.bilibili.com/video/BV1kN411D7kt/?share_source=copy_web&vd_source=ab4c734425ed0114898300f2c037ac0b" target="_blank"> 📺 Bilibili • <a href="https://opendatalab.com/LAMM" target="_blank">📀 Data</a> • <a href="https://huggingface.co/openlamm" target="_blank">📦 LAMM Models</a>
162
+ </font>
163
+ </p>
164
+ </p>
165
+ """
166
+ )
167
+ # gr.HTML("""<h1>LAMM: Language-Assisted Multi-Modal Instruction-Tuning Dataset, Framework, and Benchmark</h1>""")
168
+ # gr.Markdown(
169
+ # """
170
+ # <p>
171
+
172
+ # <a href="https://arxiv.org/pdf/2306.06687.pdf" target="_blank"><img src="https://img.shields.io/badge/arxiv-PDF-red"/></a>
173
+
174
+ # <a href="https://openlamm.github.io" target="_blank"><img src="https://img.shields.io/badge/LAMM-HomePage-blue"/></a>
175
+
176
+ # <a href="https://opendatalab.com/LAMM" target="_blank"><img src="https://img.shields.io/badge/LAMM-Dataset-green"/></a>
177
+
178
+ # <a href="https://www.youtube.com/watch?v=M7XlIe8hhPk" target="_blank"><img src="https://img.shields.io/badge/video-Youtube-red"/></a>
179
+
180
+ # <a href="https://www.bilibili.com/video/BV1kN411D7kt/?share_source=copy_web&vd_source=ab4c734425ed0114898300f2c037ac0b" target="_blank"><img src="https://img.shields.io/badge/video-Bilibili-blue"/></a>
181
+
182
+ # <a href="https://github.com/OpenLAMM/LAMM" target="_blank"><img src="https://img.shields.io/badge/Repo-Github-white"/></a>
183
+
184
+ # <a href="https://huggingface.co/openlamm" target="_blank"><img src="https://img.shields.io/badge/Models-huggingface-yellow"/></a>
185
+
186
+ # <img src="https://img.shields.io/github/stars/OpenLAMM/LAMM.svg?style=social&label=Star"/>
187
+ # </p>
188
+ # Drop your image & Start talking with LAMM models.
189
+ # """)
190
+
191
+ with gr.Row(scale=1):
192
+ with gr.Column(scale=1):
193
+ image_path = gr.Image(type="filepath", label="Image", value=None).style(height=600)
194
+
195
+ chatbot = gr.Chatbot(scale=1).style(height=600)
196
+
197
+ with gr.Row():
198
+ with gr.Column(scale=4):
199
+ with gr.Column(scale=12):
200
+ user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(container=False)
201
+ with gr.Column(min_width=32, scale=1):
202
+ with gr.Row(scale=1):
203
+ submitBtn = gr.Button("Submit", variant="primary")
204
+ with gr.Row(scale=1):
205
+ resubmitBtn = gr.Button("Resubmit", variant="primary")
206
+ with gr.Column(scale=1):
207
+ emptyBtn = gr.Button("Clear History")
208
+ max_length = gr.Slider(0, 600, value=256, step=1.0, label="Maximum length", interactive=True)
209
+ top_p = gr.Slider(0, 1, value=0.01, step=0.01, label="Top P", interactive=True)
210
+ temperature = gr.Slider(0, 1, value=0.9, step=0.01, label="Temperature", interactive=True)
211
+
212
+ history = gr.State([])
213
+ modality_cache = gr.State([])
214
+
215
+ submitBtn.click(
216
+ predict, [
217
+ user_input,
218
+ image_path,
219
+ chatbot,
220
+ max_length,
221
+ top_p,
222
+ temperature,
223
+ history,
224
+ modality_cache,
225
+ ], [
226
+ chatbot,
227
+ history,
228
+ modality_cache
229
+ ],
230
+ show_progress=True
231
+ )
232
+
233
+ resubmitBtn.click(
234
+ re_predict, [
235
+ user_input,
236
+ image_path,
237
+ chatbot,
238
+ max_length,
239
+ top_p,
240
+ temperature,
241
+ history,
242
+ modality_cache,
243
+ ], [
244
+ chatbot,
245
+ history,
246
+ modality_cache
247
+ ],
248
+ show_progress=True
249
+ )
250
+
251
+ submitBtn.click(reset_user_input, [], [user_input])
252
+ emptyBtn.click(reset_state, outputs=[
253
+ image_path,
254
+ chatbot,
255
+ history,
256
+ modality_cache
257
+ ], show_progress=True)
258
+
259
+ demo.queue().launch(enable_queue=True)
pretrained_ckpt/llm_7b_v0/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/mnt/petrelfs/wangjiong/unified_benchmark/pretrain/huggingface/models--lmsys--vicuna-7b-delta-v0/snapshots/f902a2f7e2ca5dfeedf40a0220320e50d2d4fa2a/",
3
+ "architectures": [
4
+ "LlavaLlamaForCausalLM"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 4096,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 11008,
12
+ "max_position_embeddings": 2048,
13
+ "max_sequence_length": 2048,
14
+ "model_type": "llava",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "pad_token_id": 0,
18
+ "rms_norm_eps": 1e-06,
19
+ "tie_word_embeddings": false,
20
+ "torch_dtype": "float16",
21
+ "transformers_version": "4.28.0.dev0",
22
+ "use_cache": true,
23
+ "vocab_size": 32001
24
+ }
pretrained_ckpt/llm_7b_v0/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.28.0.dev0"
7
+ }
pretrained_ckpt/llm_7b_v0/pytorch_model-00001-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef1b2c502e2eab32176400bd8af8636163619fb04e65c0c0fdea58f1cbe21807
3
+ size 9976642750
pretrained_ckpt/llm_7b_v0/pytorch_model-00002-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f3789f864bf21ca0733d782022e3656759728151fab435e6799696124099a9a
3
+ size 3500323731
pretrained_ckpt/llm_7b_v0/pytorch_model.bin.index.json ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 13476855808
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "pytorch_model-00002-of-00002.bin",
7
+ "model.embed_tokens.weight": "pytorch_model-00001-of-00002.bin",
8
+ "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
9
+ "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
10
+ "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
11
+ "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
12
+ "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
13
+ "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
14
+ "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
15
+ "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
16
+ "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
17
+ "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
18
+ "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
19
+ "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
20
+ "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
21
+ "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
22
+ "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
23
+ "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
24
+ "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
25
+ "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
26
+ "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
27
+ "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
28
+ "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
29
+ "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
30
+ "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
31
+ "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
32
+ "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
33
+ "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
34
+ "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
35
+ "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
36
+ "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
37
+ "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
38
+ "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
39
+ "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
40
+ "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
41
+ "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
42
+ "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
43
+ "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
44
+ "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
45
+ "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
46
+ "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
47
+ "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
48
+ "model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
49
+ "model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
50
+ "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
51
+ "model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
52
+ "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
53
+ "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
54
+ "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
55
+ "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
56
+ "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
57
+ "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
58
+ "model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
59
+ "model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
60
+ "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
61
+ "model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
62
+ "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
63
+ "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
64
+ "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
65
+ "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
66
+ "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
67
+ "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
68
+ "model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
69
+ "model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
70
+ "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
71
+ "model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
72
+ "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
73
+ "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
74
+ "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
75
+ "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
76
+ "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
77
+ "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
78
+ "model.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
79
+ "model.layers.15.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
80
+ "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
81
+ "model.layers.15.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
82
+ "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
83
+ "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
84
+ "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
85
+ "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
86
+ "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
87
+ "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
88
+ "model.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
89
+ "model.layers.16.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
90
+ "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
91
+ "model.layers.16.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
92
+ "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
93
+ "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
94
+ "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
95
+ "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
96
+ "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
97
+ "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
98
+ "model.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
99
+ "model.layers.17.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
100
+ "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
101
+ "model.layers.17.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
102
+ "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
103
+ "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
104
+ "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
105
+ "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
106
+ "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
107
+ "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
108
+ "model.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
109
+ "model.layers.18.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
110
+ "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
111
+ "model.layers.18.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
112
+ "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
113
+ "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
114
+ "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
115
+ "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
116
+ "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
117
+ "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
118
+ "model.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
119
+ "model.layers.19.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
120
+ "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
121
+ "model.layers.19.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
122
+ "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
123
+ "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
124
+ "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
125
+ "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
126
+ "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
127
+ "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
128
+ "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
129
+ "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
130
+ "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
131
+ "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
132
+ "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
133
+ "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
134
+ "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
135
+ "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
136
+ "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
137
+ "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
138
+ "model.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
139
+ "model.layers.20.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
140
+ "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
141
+ "model.layers.20.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
142
+ "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
143
+ "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
144
+ "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
145
+ "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
146
+ "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
147
+ "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
148
+ "model.layers.21.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
149
+ "model.layers.21.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
150
+ "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
151
+ "model.layers.21.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
152
+ "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
153
+ "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
154
+ "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
155
+ "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
156
+ "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
157
+ "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
158
+ "model.layers.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
159
+ "model.layers.22.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
160
+ "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
161
+ "model.layers.22.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
162
+ "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
163
+ "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
164
+ "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
165
+ "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
166
+ "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
167
+ "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
168
+ "model.layers.23.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
169
+ "model.layers.23.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
170
+ "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
171
+ "model.layers.23.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
172
+ "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
173
+ "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
174
+ "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
175
+ "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
176
+ "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
177
+ "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
178
+ "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
179
+ "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
180
+ "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
181
+ "model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
182
+ "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
183
+ "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
184
+ "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
185
+ "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
186
+ "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin",
187
+ "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
188
+ "model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
189
+ "model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
190
+ "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
191
+ "model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
192
+ "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
193
+ "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
194
+ "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
195
+ "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
196
+ "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin",
197
+ "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
198
+ "model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
199
+ "model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
200
+ "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
201
+ "model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
202
+ "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
203
+ "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
204
+ "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
205
+ "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
206
+ "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin",
207
+ "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
208
+ "model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
209
+ "model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
210
+ "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
211
+ "model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
212
+ "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
213
+ "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
214
+ "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
215
+ "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
216
+ "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin",
217
+ "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
218
+ "model.layers.28.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
219
+ "model.layers.28.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
220
+ "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
221
+ "model.layers.28.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
222
+ "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
223
+ "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
224
+ "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
225
+ "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
226
+ "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin",
227
+ "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
228
+ "model.layers.29.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
229
+ "model.layers.29.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
230
+ "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
231
+ "model.layers.29.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
232
+ "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
233
+ "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
234
+ "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
235
+ "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
236
+ "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin",
237
+ "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
238
+ "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
239
+ "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
240
+ "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
241
+ "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
242
+ "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
243
+ "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
244
+ "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
245
+ "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
246
+ "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
247
+ "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
248
+ "model.layers.30.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
249
+ "model.layers.30.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
250
+ "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
251
+ "model.layers.30.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
252
+ "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
253
+ "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
254
+ "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
255
+ "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
256
+ "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin",
257
+ "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
258
+ "model.layers.31.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
259
+ "model.layers.31.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
260
+ "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
261
+ "model.layers.31.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
262
+ "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
263
+ "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
264
+ "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
265
+ "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
266
+ "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin",
267
+ "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
268
+ "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
269
+ "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
270
+ "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
271
+ "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
272
+ "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
273
+ "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
274
+ "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
275
+ "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
276
+ "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
277
+ "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
278
+ "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
279
+ "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
280
+ "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
281
+ "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
282
+ "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
283
+ "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
284
+ "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
285
+ "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
286
+ "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
287
+ "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
288
+ "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
289
+ "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
290
+ "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
291
+ "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
292
+ "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
293
+ "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
294
+ "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
295
+ "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
296
+ "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
297
+ "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
298
+ "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
299
+ "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
300
+ "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
301
+ "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
302
+ "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
303
+ "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
304
+ "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
305
+ "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
306
+ "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
307
+ "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
308
+ "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
309
+ "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
310
+ "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
311
+ "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
312
+ "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
313
+ "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
314
+ "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
315
+ "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
316
+ "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
317
+ "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
318
+ "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
319
+ "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
320
+ "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
321
+ "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
322
+ "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
323
+ "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
324
+ "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
325
+ "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
326
+ "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
327
+ "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
328
+ "model.norm.weight": "pytorch_model-00002-of-00002.bin"
329
+ }
330
+ }
pretrained_ckpt/llm_7b_v0/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
pretrained_ckpt/llm_7b_v0/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
pretrained_ckpt/llm_7b_v0/tokenizer_config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "__type": "AddedToken",
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "clean_up_tokenization_spaces": false,
11
+ "eos_token": {
12
+ "__type": "AddedToken",
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "model_max_length": 1000000000000000019884624838656,
20
+ "pad_token": null,
21
+ "special_tokens_map_file": "/mnt/petrelfs/wangjiong/unified_benchmark/pretrain/huggingface/models--lmsys--vicuna-7b-delta-v0/snapshots/f902a2f7e2ca5dfeedf40a0220320e50d2d4fa2a/special_tokens_map.json",
22
+ "tokenizer_class": "LlamaTokenizer",
23
+ "unk_token": {
24
+ "__type": "AddedToken",
25
+ "content": "<unk>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }