Upload folder using huggingface_hub
Browse files- README.md +462 -0
- SYSTEM_PROMPT.txt +18 -0
- config.json +36 -0
- consolidated.safetensors.index.json +802 -0
- generation_config.json +6 -0
- huggingface-metadata.txt +107 -0
- measurement.json +0 -0
- model.safetensors.index.json +802 -0
- output-00001-of-00005.safetensors +3 -0
- output-00002-of-00005.safetensors +3 -0
- output-00003-of-00005.safetensors +3 -0
- output-00004-of-00005.safetensors +3 -0
- output-00005-of-00005.safetensors +3 -0
- params.json +11 -0
- special_tokens_map.json +23 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer.model.v7 +0 -0
- tokenizer_config.json +0 -0
README.md
ADDED
@@ -0,0 +1,462 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
- fr
|
5 |
+
- de
|
6 |
+
- es
|
7 |
+
- it
|
8 |
+
- pt
|
9 |
+
- zh
|
10 |
+
- ja
|
11 |
+
- ru
|
12 |
+
- ko
|
13 |
+
license: other
|
14 |
+
license_name: mrl
|
15 |
+
inference: false
|
16 |
+
license_link: https://mistral.ai/licenses/MRL-0.1.md
|
17 |
+
extra_gated_prompt: >-
|
18 |
+
# Mistral AI Research License
|
19 |
+
|
20 |
+
If You want to use a Mistral Model, a Derivative or an Output for any purpose that is not expressly authorized under this Agreement, You must request a license from Mistral AI, which Mistral AI may grant to You in Mistral AI's sole discretion. To discuss such a license, please contact Mistral AI via the website contact form: https://mistral.ai/contact/
|
21 |
+
|
22 |
+
## 1. Scope and acceptance
|
23 |
+
|
24 |
+
**1.1. Scope of the Agreement.** This Agreement applies to any use, modification, or Distribution of any Mistral Model by You, regardless of the source You obtained a copy of such Mistral Model.
|
25 |
+
|
26 |
+
**1.2. Acceptance.** By accessing, using, modifying, Distributing a Mistral Model, or by creating, using or distributing a Derivative of the Mistral Model, You agree to be bound by this Agreement.
|
27 |
+
|
28 |
+
**1.3. Acceptance on behalf of a third-party.** If You accept this Agreement on behalf of Your employer or another person or entity, You warrant and represent that You have the authority to act and accept this Agreement on their behalf. In such a case, the word "You" in this Agreement will refer to Your employer or such other person or entity.
|
29 |
+
|
30 |
+
## 2. License
|
31 |
+
|
32 |
+
**2.1. Grant of rights**. Subject to Section 3 below, Mistral AI hereby grants You a non-exclusive, royalty-free, worldwide, non-sublicensable, non-transferable, limited license to use, copy, modify, and Distribute under the conditions provided in Section 2.2 below, the Mistral Model and any Derivatives made by or for Mistral AI and to create Derivatives of the Mistral Model.
|
33 |
+
|
34 |
+
**2.2. Distribution of Mistral Model and Derivatives made by or for Mistral AI.** Subject to Section 3 below, You may Distribute copies of the Mistral Model and/or Derivatives made by or for Mistral AI, under the following conditions:
|
35 |
+
You must make available a copy of this Agreement to third-party recipients of the Mistral Models and/or Derivatives made by or for Mistral AI you Distribute, it being specified that any rights to use the Mistral Models and/or Derivatives made by or for Mistral AI shall be directly granted by Mistral AI to said third-party recipients pursuant to the Mistral AI Research License agreement executed between these parties;
|
36 |
+
You must retain in all copies of the Mistral Models the following attribution notice within a "Notice" text file distributed as part of such copies: "Licensed by Mistral AI under the Mistral AI Research License".
|
37 |
+
|
38 |
+
**2.3. Distribution of Derivatives made by or for You.** Subject to Section 3 below, You may Distribute any Derivatives made by or for You under additional or different terms and conditions, provided that:
|
39 |
+
In any event, the use and modification of Mistral Model and/or Derivatives made by or for Mistral AI shall remain governed by the terms and conditions of this Agreement;
|
40 |
+
You include in any such Derivatives made by or for You prominent notices stating that You modified the concerned Mistral Model; and
|
41 |
+
Any terms and conditions You impose on any third-party recipients relating to Derivatives made by or for You shall neither limit such third-party recipients' use of the Mistral Model or any Derivatives made by or for Mistral AI in accordance with the Mistral AI Research License nor conflict with any of its terms and conditions.
|
42 |
+
|
43 |
+
## 3. Limitations
|
44 |
+
|
45 |
+
**3.1. Misrepresentation.** You must not misrepresent or imply, through any means, that the Derivatives made by or for You and/or any modified version of the Mistral Model You Distribute under your name and responsibility is an official product of Mistral AI or has been endorsed, approved or validated by Mistral AI, unless You are authorized by Us to do so in writing.
|
46 |
+
|
47 |
+
**3.2. Usage Limitation.** You shall only use the Mistral Models, Derivatives (whether or not created by Mistral AI) and Outputs for Research Purposes.
|
48 |
+
|
49 |
+
## 4. Intellectual Property
|
50 |
+
|
51 |
+
**4.1. Trademarks.** No trademark licenses are granted under this Agreement, and in connection with the Mistral Models, You may not use any name or mark owned by or associated with Mistral AI or any of its affiliates, except (i) as required for reasonable and customary use in describing and Distributing the Mistral Models and Derivatives made by or for Mistral AI and (ii) for attribution purposes as required by this Agreement.
|
52 |
+
|
53 |
+
**4.2. Outputs.** We claim no ownership rights in and to the Outputs. You are solely responsible for the Outputs You generate and their subsequent uses in accordance with this Agreement. Any Outputs shall be subject to the restrictions set out in Section 3 of this Agreement.
|
54 |
+
|
55 |
+
**4.3. Derivatives.** By entering into this Agreement, You accept that any Derivatives that You may create or that may be created for You shall be subject to the restrictions set out in Section 3 of this Agreement.
|
56 |
+
|
57 |
+
## 5. Liability
|
58 |
+
|
59 |
+
**5.1. Limitation of liability.** In no event, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall Mistral AI be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this Agreement or out of the use or inability to use the Mistral Models and Derivatives (including but not limited to damages for loss of data, loss of goodwill, loss of expected profit or savings, work stoppage, computer failure or malfunction, or any damage caused by malware or security breaches), even if Mistral AI has been advised of the possibility of such damages.
|
60 |
+
|
61 |
+
**5.2. Indemnification.** You agree to indemnify and hold harmless Mistral AI from and against any claims, damages, or losses arising out of or related to Your use or Distribution of the Mistral Models and Derivatives.
|
62 |
+
|
63 |
+
## 6. Warranty
|
64 |
+
|
65 |
+
**6.1. Disclaimer.** Unless required by applicable law or prior agreed to by Mistral AI in writing, Mistral AI provides the Mistral Models and Derivatives on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. Mistral AI does not represent nor warrant that the Mistral Models and Derivatives will be error-free, meet Your or any third party's requirements, be secure or will allow You or any third party to achieve any kind of result or generate any kind of content. You are solely responsible for determining the appropriateness of using or Distributing the Mistral Models and Derivatives and assume any risks associated with Your exercise of rights under this Agreement.
|
66 |
+
|
67 |
+
## 7. Termination
|
68 |
+
|
69 |
+
**7.1. Term.** This Agreement is effective as of the date of your acceptance of this Agreement or access to the concerned Mistral Models or Derivatives and will continue until terminated in accordance with the following terms.
|
70 |
+
|
71 |
+
**7.2. Termination.** Mistral AI may terminate this Agreement at any time if You are in breach of this Agreement. Upon termination of this Agreement, You must cease to use all Mistral Models and Derivatives and shall permanently delete any copy thereof. The following provisions, in their relevant parts, will survive any termination or expiration of this Agreement, each for the duration necessary to achieve its own intended purpose (e.g. the liability provision will survive until the end of the applicable limitation period):Sections 5 (Liability), 6(Warranty), 7 (Termination) and 8 (General Provisions).
|
72 |
+
|
73 |
+
**7.3. Litigation.** If You initiate any legal action or proceedings against Us or any other entity (including a cross-claim or counterclaim in a lawsuit), alleging that the Model or a Derivative, or any part thereof, infringe upon intellectual property or other rights owned or licensable by You, then any licenses granted to You under this Agreement will immediately terminate as of the date such legal action or claim is filed or initiated.
|
74 |
+
|
75 |
+
## 8. General provisions
|
76 |
+
|
77 |
+
**8.1. Governing laws.** This Agreement will be governed by the laws of France, without regard to choice of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement.
|
78 |
+
|
79 |
+
**8.2. Competent jurisdiction.** The courts of Paris shall have exclusive jurisdiction of any dispute arising out of this Agreement.
|
80 |
+
|
81 |
+
**8.3. Severability.** If any provision of this Agreement is held to be invalid, illegal or unenforceable, the remaining provisions shall be unaffected thereby and remain valid as if such provision had not been set forth herein.
|
82 |
+
|
83 |
+
## 9. Definitions
|
84 |
+
|
85 |
+
"Agreement": means this Mistral AI Research License agreement governing the access, use, and Distribution of the Mistral Models, Derivatives and Outputs.
|
86 |
+
|
87 |
+
"Derivative": means any (i) modified version of the Mistral Model (including but not limited to any customized or fine-tuned version thereof), (ii) work based on the Mistral Model, or (iii) any other derivative work thereof.
|
88 |
+
|
89 |
+
"Distribution", "Distributing", "Distribute" or "Distributed": means supplying, providing or making available, by any means, a copy of the Mistral Models and/or the Derivatives as the case may be, subject to Section 3 of this Agreement.
|
90 |
+
|
91 |
+
"Mistral AI", "We" or "Us": means Mistral AI, a French société par actions simplifiée registered in the Paris commercial registry under the number 952 418 325, and having its registered seat at 15, rue des Halles, 75001 Paris.
|
92 |
+
|
93 |
+
"Mistral Model": means the foundational large language model(s), and its elements which include algorithms, software, instructed checkpoints, parameters, source code (inference code, evaluation code and, if applicable, fine-tuning code) and any other elements associated thereto made available by Mistral AI under this Agreement, including, if any, the technical documentation, manuals and instructions for the use and operation thereof.
|
94 |
+
|
95 |
+
"Research Purposes": means any use of a Mistral Model, Derivative, or Output that is solely for (a) personal, scientific or academic research, and (b) for non-profit and non-commercial purposes, and not directly or indirectly connected to any commercial activities or business operations. For illustration purposes, Research Purposes does not include (1) any usage of the Mistral Model, Derivative or Output by individuals or contractors employed in or engaged by companies in the context of (a) their daily tasks, or (b) any activity (including but not limited to any testing or proof-of-concept) that is intended to generate revenue, nor (2) any Distribution by a commercial entity of the Mistral Model, Derivative or Output whether in return for payment or free of charge, in any medium or form, including but not limited to through a hosted or managed service (e.g. SaaS, cloud instances, etc.), or behind a software layer.
|
96 |
+
|
97 |
+
"Outputs": means any content generated by the operation of the Mistral Models or the Derivatives from a prompt (i.e., text instructions) provided by users. For the avoidance of doubt, Outputs do not include any components of a Mistral Models, such as any fine-tuned versions of the Mistral Models, the weights, or parameters.
|
98 |
+
|
99 |
+
"You": means the individual or entity entering into this Agreement with Mistral AI.
|
100 |
+
|
101 |
+
|
102 |
+
*Mistral AI processes your personal data below to provide the model and enforce its license. If you are affiliated with a commercial entity, we may also send you communications about our models. For more information on your rights and data handling, please see our <a href="https://mistral.ai/terms/">privacy policy</a>.*
|
103 |
+
extra_gated_fields:
|
104 |
+
First Name: text
|
105 |
+
Last Name: text
|
106 |
+
Country: country
|
107 |
+
Affiliation: text
|
108 |
+
Job title: text
|
109 |
+
I understand that I can only use the model, any derivative versions and their outputs for non-commercial research purposes: checkbox
|
110 |
+
I understand that if I am a commercial entity, I am not permitted to use or distribute the model internally or externally, or expose it in my own offerings without a commercial license: checkbox
|
111 |
+
I understand that if I upload the model, or any derivative version, on any platform, I must include the Mistral Research License: checkbox
|
112 |
+
I understand that for commercial use of the model, I can contact Mistral or use the Mistral AI API on la Plateforme or any of our cloud provider partners: checkbox
|
113 |
+
? By clicking Submit below I accept the terms of the license and acknowledge that
|
114 |
+
the information I provide will be collected stored processed and shared in accordance
|
115 |
+
with the Mistral Privacy Policy
|
116 |
+
: checkbox
|
117 |
+
geo: ip_location
|
118 |
+
extra_gated_description: >-
|
119 |
+
Mistral AI processes your personal data below to provide the model and enforce its license. If you are affiliated with a commercial entity, we may also send you communications about our models. For more information on your rights and data handling, please see our <a href="https://mistral.ai/terms/">privacy policy</a>.
|
120 |
+
extra_gated_button_content: Submit
|
121 |
+
library_name: vllm
|
122 |
+
---
|
123 |
+
|
124 |
+
# Model Card for Mistral-Large-Instruct-2411
|
125 |
+
|
126 |
+
Mistral-Large-Instruct-2411 is an advanced dense Large Language Model (LLM) of 123B parameters with state-of-the-art reasoning, knowledge and coding capabilities extending [Mistral-Large-Instruct-2407](https://huggingface.co/mistralai/Mistral-Large-Instruct-2407) with better Long Context, Function Calling and System Prompt.
|
127 |
+
|
128 |
+
## Key features
|
129 |
+
- **Multi-lingual by design:** Dozens of languages supported, including English, French, German, Spanish, Italian, Chinese, Japanese, Korean, Portuguese, Dutch and Polish.
|
130 |
+
- **Proficient in coding:** Trained on 80+ coding languages such as Python, Java, C, C++, Javacsript, and Bash. Also trained on more specific languages such as Swift and Fortran.
|
131 |
+
- **Agent-centric:** Best-in-class agentic capabilities with native function calling and JSON outputting.
|
132 |
+
- **Advanced Reasoning:** State-of-the-art mathematical and reasoning capabilities.
|
133 |
+
- **Mistral Research License:** Allows usage and modification for non-commercial usages.
|
134 |
+
- **Large Context:** A large 128k context window.
|
135 |
+
- **Robust Context Adherence:** Ensures strong adherence for RAG and large context applications.
|
136 |
+
- **System Prompt:** Maintains strong adherence and support for more reliable system prompts.
|
137 |
+
|
138 |
+
### System Prompt
|
139 |
+
We appreciate the feedback received from our community regarding our system prompt handling.
|
140 |
+
In response, we have implemented stronger support for system prompts.
|
141 |
+
To achieve optimal results, we recommend always including a system prompt that clearly outlines the bot's purpose, even if it is minimal.
|
142 |
+
|
143 |
+
### Basic Instruct Template (V7)
|
144 |
+
|
145 |
+
```
|
146 |
+
<s>[SYSTEM_PROMPT] <system prompt>[/SYSTEM_PROMPT][INST] <user message>[/INST] <assistant response></s>[INST] <user message>[/INST]
|
147 |
+
```
|
148 |
+
|
149 |
+
**Be careful with subtle missing or trailing white spaces!**
|
150 |
+
|
151 |
+
*Please make sure to use [mistral-common](https://github.com/mistralai/mistral-common) as the source of truth*
|
152 |
+
|
153 |
+
## Usage
|
154 |
+
|
155 |
+
The model can be used with the following frameworks
|
156 |
+
|
157 |
+
- [`vllm`](https://github.com/vllm-project/vllm): See [here](#vLLM)
|
158 |
+
|
159 |
+
### vLLM
|
160 |
+
|
161 |
+
We recommend using this model with the [vLLM library](https://github.com/vllm-project/vllm)
|
162 |
+
to implement production-ready inference pipelines.
|
163 |
+
|
164 |
+
**_Installation_**
|
165 |
+
|
166 |
+
Make sure you install [`vLLM >= v0.6.4.post1`](https://github.com/vllm-project/vllm/releases/tag/v0.6.4.post1):
|
167 |
+
|
168 |
+
```
|
169 |
+
pip install --upgrade vllm
|
170 |
+
```
|
171 |
+
|
172 |
+
Also make sure you have [`mistral_common >= 1.5.0`](https://github.com/mistralai/mistral-common/releases/tag/v1.5.0) installed:
|
173 |
+
|
174 |
+
```
|
175 |
+
pip install --upgrade mistral_common
|
176 |
+
```
|
177 |
+
|
178 |
+
You can also make use of a ready-to-go [docker image](https://github.com/vllm-project/vllm/blob/main/Dockerfile) or on the [docker hub](https://hub.docker.com/layers/vllm/vllm-openai/latest/images/sha256-55a88146a4da0b6e193431b5b1d3492dfd7bebdc16919df4d031273e85a6157c?context=explore).
|
179 |
+
|
180 |
+
### Server
|
181 |
+
|
182 |
+
We recommand that you use Mistral-Large-Instruct-2411 in a server/client setting.
|
183 |
+
|
184 |
+
1. Spin up a server:
|
185 |
+
|
186 |
+
|
187 |
+
```
|
188 |
+
vllm serve mistralai/Mistral-Large-Instruct-2411 --tokenizer_mode mistral --config_format mistral --load_format mistral --tensor_parallel_size 8
|
189 |
+
```
|
190 |
+
|
191 |
+
**Note:** Running Ministral-8B on GPU requires over 300 GB of GPU RAM.
|
192 |
+
|
193 |
+
|
194 |
+
2. To ping the client you can use a simple Python snippet.
|
195 |
+
|
196 |
+
```py
|
197 |
+
import requests
|
198 |
+
import json
|
199 |
+
from huggingface_hub import hf_hub_download
|
200 |
+
from datetime import datetime, timedelta
|
201 |
+
|
202 |
+
url = "http://<your-server>:8000/v1/chat/completions"
|
203 |
+
headers = {"Content-Type": "application/json", "Authorization": "Bearer token"}
|
204 |
+
|
205 |
+
model = "mistralai/Mistral-Large-Instruct-2411"
|
206 |
+
|
207 |
+
|
208 |
+
def load_system_prompt(repo_id: str, filename: str) -> str:
|
209 |
+
file_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
210 |
+
with open(file_path, "r") as file:
|
211 |
+
system_prompt = file.read()
|
212 |
+
today = datetime.today().strftime("%Y-%m-%d")
|
213 |
+
yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
|
214 |
+
model_name = repo_id.split("/")[-1]
|
215 |
+
return system_prompt.format(name=model_name, today=today, yesterday=yesterday)
|
216 |
+
|
217 |
+
|
218 |
+
SYSTEM_PROMPT = load_system_prompt(model, "SYSTEM_PROMPT.txt")
|
219 |
+
|
220 |
+
|
221 |
+
messages = [
|
222 |
+
{"role": "system", "content": SYSTEM_PROMPT + "\n\nThink step by step. You're a math genius."},
|
223 |
+
{
|
224 |
+
"role": "user",
|
225 |
+
"content": "Think of four random numbers. Then add, substract or multiply them so that the solution is 10. If it's not possible, say it."
|
226 |
+
},
|
227 |
+
]
|
228 |
+
|
229 |
+
data = {"model": model, "messages": messages}
|
230 |
+
|
231 |
+
response = requests.post(url, headers=headers, data=json.dumps(data))
|
232 |
+
print(response.json()["choices"][0]["message"]["content"])
|
233 |
+
# Sure, let's start by thinking of four random numbers. For example, let's take 3, 5, 2, and 1.
|
234 |
+
#
|
235 |
+
# Now, we need to find a combination of addition, subtraction, or multiplication that results in 10.
|
236 |
+
|
237 |
+
# Let's try:
|
238 |
+
|
239 |
+
# \[ 3 + 5 + 2 - 1 = 9 \]
|
240 |
+
|
241 |
+
# This doesn't work. Let's try another combination:
|
242 |
+
|
243 |
+
# \[ 3 \times 2 + 5 - 1 = 6 + 5 - 1 = 10 \]
|
244 |
+
|
245 |
+
# This works! So, with the numbers 3, 5, 2, and 1, we can achieve the result 10 by performing the operations \( 3 \times 2 + 5 - 1 \).
|
246 |
+
```
|
247 |
+
|
248 |
+
### Offline
|
249 |
+
|
250 |
+
```py
|
251 |
+
from vllm import LLM
|
252 |
+
from vllm.sampling_params import SamplingParams
|
253 |
+
from huggingface_hub import hf_hub_download
|
254 |
+
from datetime import datetime, timedelta
|
255 |
+
|
256 |
+
model_name = "mistralai/Mistral-Large-Instruct-2411"
|
257 |
+
|
258 |
+
def load_system_prompt(repo_id: str, filename: str) -> str:
|
259 |
+
file_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
260 |
+
with open(file_path, 'r') as file:
|
261 |
+
system_prompt = file.read()
|
262 |
+
today = datetime.today().strftime('%Y-%m-%d')
|
263 |
+
yesterday = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')
|
264 |
+
model_name = repo_id.split("/")[-1]
|
265 |
+
return system_prompt.format(name=model_name, today=today, yesterday=yesterday)
|
266 |
+
|
267 |
+
|
268 |
+
SYSTEM_PROMPT = load_system_prompt(model_name, "SYSTEM_PROMPT.txt") + "\n\nThink step by step. You're a math genius."
|
269 |
+
|
270 |
+
user_prompt = "Without browsing the web, how many days ago was Mistral founded?"
|
271 |
+
|
272 |
+
messages = [
|
273 |
+
{
|
274 |
+
"role": "system",
|
275 |
+
"content": SYSTEM_PROMPT
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"role": "user",
|
279 |
+
"content": user_prompt
|
280 |
+
},
|
281 |
+
]
|
282 |
+
|
283 |
+
# note that running this model on GPU requires over 300 GB of GPU RAM
|
284 |
+
llm = LLM(model=model_name, tokenizer_mode="mistral", tensor_parallel_size=8)
|
285 |
+
|
286 |
+
sampling_params = SamplingParams(max_tokens=512)
|
287 |
+
|
288 |
+
outputs = llm.chat(messages, sampling_params=sampling_params)
|
289 |
+
|
290 |
+
print(outputs[0].outputs[0].text)
|
291 |
+
# I don't have real-time web browsing capabilities or access to current data, but I can help you calculate the number of days based on the information I have.
|
292 |
+
#
|
293 |
+
#Mistral AI was founded in April 2023. To determine how many days ago that was from today's date, November 18, 2024, we need to calculate the total number of days between April 2023 and November 2024.
|
294 |
+
#
|
295 |
+
#Here's the step-by-step calculation:
|
296 |
+
#
|
297 |
+
#1. **Days from April 2023 to December 2023:**
|
298 |
+
# - April 2023: 30 days (April has 30 days)
|
299 |
+
# - May 2023: 31 days
|
300 |
+
# - June 2023: 30 days
|
301 |
+
# - July 2023: 31 days
|
302 |
+
# - August 2023: 31 days
|
303 |
+
# - September 2023: 30 days
|
304 |
+
# - October 2023: 31 days
|
305 |
+
# - November 2023: 30 days
|
306 |
+
# - December 2023: 31 days
|
307 |
+
#
|
308 |
+
# Total days in 2023 from April to December = 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30 + 31 = 275 days
|
309 |
+
#
|
310 |
+
#2. **Days from January 2024 to November 18, 2024:**
|
311 |
+
# - January 2024: 31 days
|
312 |
+
# - February 2024: 29 days (2024 is a leap year)
|
313 |
+
# - March 2024: 31 days
|
314 |
+
# - April 2024: 30 days
|
315 |
+
# - May 2024: 31 days
|
316 |
+
# - June 2024: 30 days
|
317 |
+
# - July 2024: 31 days
|
318 |
+
# - August 2024: 31 days
|
319 |
+
# - September 2024: 30 days
|
320 |
+
# - October 2024: 31 days
|
321 |
+
# - November 2024 (up to the 18th): 18 days
|
322 |
+
#
|
323 |
+
# Total days in 2024 from January to November 18 = 31 + 29 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 18 = 323 days
|
324 |
+
#
|
325 |
+
#3. **Total days from April 2023 to November 18, 2024:**
|
326 |
+
# Total days = 275 days (2023) + 323 days (2024) = 598 days
|
327 |
+
#
|
328 |
+
#Therefore, Mistral AI was founded 598 days ago from today's date, November 18, 2024.
|
329 |
+
```
|
330 |
+
|
331 |
+
### Improved Function Calling
|
332 |
+
|
333 |
+
Mistral-Large-2411 has much improved function calling capabilities that are fully supported
|
334 |
+
using [`mistral_common >= 1.5.0`](https://github.com/mistralai/mistral-common/releases/tag/v1.5.0) and [`vLLM >= v0.6.4.post1`](https://github.com/vllm-project/vllm/releases/tag/v0.6.4.post1).
|
335 |
+
|
336 |
+
Make sure to serve the model with the following flags in vLLM:
|
337 |
+
|
338 |
+
```
|
339 |
+
vllm serve mistralai/Pixtral-Large-Instruct-2411 --tokenizer_mode mistral --tensor-parallel-size 8 --tool-call-parser mistral --enable-auto-tool-choice
|
340 |
+
```
|
341 |
+
|
342 |
+
<details>
|
343 |
+
<summary>Example</summary>
|
344 |
+
|
345 |
+
```py
|
346 |
+
import requests
|
347 |
+
import json
|
348 |
+
from huggingface_hub import hf_hub_download
|
349 |
+
from datetime import datetime, timedelta
|
350 |
+
|
351 |
+
url = "http://<your-server>:8000/v1/chat/completions"
|
352 |
+
headers = {"Content-Type": "application/json", "Authorization": "Bearer token"}
|
353 |
+
|
354 |
+
model = "mistralai/Mistral-Large-Instruct-2411"
|
355 |
+
|
356 |
+
|
357 |
+
def load_system_prompt(repo_id: str, filename: str) -> str:
|
358 |
+
file_path = hf_hub_download(repo_id=repo_id, filename=filename)
|
359 |
+
with open(file_path, "r") as file:
|
360 |
+
system_prompt = file.read()
|
361 |
+
today = datetime.today().strftime("%Y-%m-%d")
|
362 |
+
yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
|
363 |
+
model_name = repo_id.split("/")[-1]
|
364 |
+
return system_prompt.format(name=model_name, today=today, yesterday=yesterday)
|
365 |
+
|
366 |
+
|
367 |
+
SYSTEM_PROMPT = load_system_prompt(model, "SYSTEM_PROMPT.txt")
|
368 |
+
|
369 |
+
|
370 |
+
tools = [
|
371 |
+
{
|
372 |
+
"type": "function",
|
373 |
+
"function": {
|
374 |
+
"name": "get_current_weather",
|
375 |
+
"description": "Get the current weather in a given location",
|
376 |
+
"parameters": {
|
377 |
+
"type": "object",
|
378 |
+
"properties": {
|
379 |
+
"city": {
|
380 |
+
"type": "string",
|
381 |
+
"description": "The city to find the weather for, e.g. 'San Francisco'",
|
382 |
+
},
|
383 |
+
"state": {
|
384 |
+
"type": "string",
|
385 |
+
"description": "The state abbreviation, e.g. 'CA' for California",
|
386 |
+
},
|
387 |
+
"unit": {
|
388 |
+
"type": "string",
|
389 |
+
"description": "The unit for temperature",
|
390 |
+
"enum": ["celsius", "fahrenheit"],
|
391 |
+
},
|
392 |
+
},
|
393 |
+
"required": ["city", "state", "unit"],
|
394 |
+
},
|
395 |
+
},
|
396 |
+
},
|
397 |
+
{
|
398 |
+
"type": "function",
|
399 |
+
"function": {
|
400 |
+
"name": "rewrite",
|
401 |
+
"description": "Rewrite a given text for improved clarity",
|
402 |
+
"parameters": {
|
403 |
+
"type": "object",
|
404 |
+
"properties": {
|
405 |
+
"text": {
|
406 |
+
"type": "string",
|
407 |
+
"description": "The input text to rewrite",
|
408 |
+
}
|
409 |
+
},
|
410 |
+
},
|
411 |
+
},
|
412 |
+
},
|
413 |
+
]
|
414 |
+
|
415 |
+
messages = [
|
416 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
417 |
+
{
|
418 |
+
"role": "user",
|
419 |
+
"content": "Could you please make the below article more concise?\n\nOpenAI is an artificial intelligence research laboratory consisting of the non-profit OpenAI Incorporated and its for-profit subsidiary corporation OpenAI Limited Partnership.",
|
420 |
+
},
|
421 |
+
{
|
422 |
+
"role": "assistant",
|
423 |
+
"content": "",
|
424 |
+
"tool_calls": [
|
425 |
+
{
|
426 |
+
"id": "bbc5b7ede",
|
427 |
+
"type": "function",
|
428 |
+
"function": {
|
429 |
+
"name": "rewrite",
|
430 |
+
"arguments": '{"text": "OpenAI is an artificial intelligence research laboratory consisting of the non-profit OpenAI Incorporated and its for-profit subsidiary corporation OpenAI Limited Partnership."}',
|
431 |
+
},
|
432 |
+
}
|
433 |
+
],
|
434 |
+
},
|
435 |
+
{
|
436 |
+
"role": "tool",
|
437 |
+
"content": '{"action":"rewrite","outcome":"OpenAI is a FOR-profit company."}',
|
438 |
+
"tool_call_id": "bbc5b7ede",
|
439 |
+
"name": "rewrite",
|
440 |
+
},
|
441 |
+
{
|
442 |
+
"role": "assistant",
|
443 |
+
"content": "---\n\nOpenAI is a FOR-profit company.",
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"role": "user",
|
447 |
+
"content": "Can you tell me what the temperature will be in Dallas, in Fahrenheit?",
|
448 |
+
},
|
449 |
+
]
|
450 |
+
|
451 |
+
data = {"model": model, "messages": messages, "tools": tools}
|
452 |
+
|
453 |
+
response = requests.post(url, headers=headers, data=json.dumps(data))
|
454 |
+
print(response.json()["choices"][0]["message"]["tool_calls"])
|
455 |
+
# [{'id': '8PdihwL6d', 'type': 'function', 'function': {'name': 'get_current_weather', 'arguments': '{"city": "Dallas", "state": "TX", "unit": "fahrenheit"}'}}]
|
456 |
+
```
|
457 |
+
|
458 |
+
</details>
|
459 |
+
|
460 |
+
## The Mistral AI Team
|
461 |
+
|
462 |
+
Albert Jiang, Alexandre Sablayrolles, Alexis Tacnet, Alok Kothari, Antoine Roux, Arthur Mensch, Audrey Herblin-Stoop, Augustin Garreau, Austin Birky, Bam4d, Baptiste Bout, Baudouin de Monicault, Blanche Savary, Carole Rambaud, Caroline Feldman, Devendra Singh Chaplot, Diego de las Casas, Diogo Costa, Eleonore Arcelin, Emma Bou Hanna, Etienne Metzger, Gaspard Blanchet, Gianna Lengyel, Guillaume Bour, Guillaume Lample, Harizo Rajaona, Henri Roussez, Hichem Sattouf, Ian Mack, Jean-Malo Delignon, Jessica Chudnovsky, Justus Murke, Kartik Khandelwal, Lawrence Stewart, Louis Martin, Louis Ternon, Lucile Saulnier, Lélio Renard Lavaud, Margaret Jennings, Marie Pellat, Marie Torelli, Marie-Anne Lachaux, Marjorie Janiewicz, Mickaël Seznec, Nicolas Schuhl, Niklas Muhs, Olivier de Garrigues, Patrick von Platen, Paul Jacob, Pauline Buche, Pavan Kumar Reddy, Perry Savas, Pierre Stock, Romain Sauvestre, Sagar Vaze, Sandeep Subramanian, Saurabh Garg, Sophia Yang, Szymon Antoniak, Teven Le Scao, Thibault Schueller, Thibaut Lavril, Thomas Wang, Théophile Gervet, Timothée Lacroix, Valera Nemychnikova, Wendy Shang, William El Sayed, William Marshallilliam Marshall
|
SYSTEM_PROMPT.txt
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
You are {name}, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.
|
2 |
+
You power an AI assistant called Le Chat.
|
3 |
+
Your knowledge base was last updated on 2023-10-01.
|
4 |
+
The current date is {today}.
|
5 |
+
|
6 |
+
When you're not sure about some information, you say that you don't have the information and don't make up anything.
|
7 |
+
If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?").
|
8 |
+
You are always very attentive to dates, in particular you try to resolve dates (e.g. "yesterday" is {yesterday}) and when asked about information at specific dates, you discard information that is at another date.
|
9 |
+
You follow these instructions in all languages, and always respond to the user in the language they use or request.
|
10 |
+
Next sections describe the capabilities that you have.
|
11 |
+
|
12 |
+
# WEB BROWSING INSTRUCTIONS
|
13 |
+
|
14 |
+
You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat.
|
15 |
+
|
16 |
+
# MULTI-MODAL INSTRUCTIONS
|
17 |
+
|
18 |
+
You do not have any multimodal capability, in particular you cannot read nor generate images, or transcribe audio files or videos.
|
config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"MistralForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_dropout": 0.0,
|
6 |
+
"bos_token_id": 1,
|
7 |
+
"eos_token_id": 2,
|
8 |
+
"head_dim": 128,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 12288,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 28672,
|
13 |
+
"max_position_embeddings": 131072,
|
14 |
+
"model_type": "mistral",
|
15 |
+
"num_attention_heads": 96,
|
16 |
+
"num_hidden_layers": 88,
|
17 |
+
"num_key_value_heads": 8,
|
18 |
+
"rms_norm_eps": 1e-05,
|
19 |
+
"rope_theta": 1000000.0,
|
20 |
+
"sliding_window": null,
|
21 |
+
"tie_word_embeddings": false,
|
22 |
+
"transformers_version": "4.46.2",
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 32768,
|
25 |
+
"quantization_config": {
|
26 |
+
"quant_method": "exl2",
|
27 |
+
"version": "0.2.3",
|
28 |
+
"bits": 2.5,
|
29 |
+
"head_bits": 6,
|
30 |
+
"calibration": {
|
31 |
+
"rows": 115,
|
32 |
+
"length": 2048,
|
33 |
+
"dataset": "(default)"
|
34 |
+
}
|
35 |
+
}
|
36 |
+
}
|
consolidated.safetensors.index.json
ADDED
@@ -0,0 +1,802 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"total_size": 245220139008
|
4 |
+
},
|
5 |
+
"weight_map": {
|
6 |
+
"layers.0.attention.wk.weight": "consolidated-00001-of-00051.safetensors",
|
7 |
+
"layers.0.attention.wo.weight": "consolidated-00001-of-00051.safetensors",
|
8 |
+
"layers.0.attention.wq.weight": "consolidated-00001-of-00051.safetensors",
|
9 |
+
"layers.0.attention.wv.weight": "consolidated-00001-of-00051.safetensors",
|
10 |
+
"layers.0.attention_norm.weight": "consolidated-00001-of-00051.safetensors",
|
11 |
+
"layers.0.feed_forward.w1.weight": "consolidated-00001-of-00051.safetensors",
|
12 |
+
"layers.0.feed_forward.w2.weight": "consolidated-00001-of-00051.safetensors",
|
13 |
+
"layers.0.feed_forward.w3.weight": "consolidated-00001-of-00051.safetensors",
|
14 |
+
"layers.0.ffn_norm.weight": "consolidated-00001-of-00051.safetensors",
|
15 |
+
"layers.1.attention.wk.weight": "consolidated-00001-of-00051.safetensors",
|
16 |
+
"layers.1.attention.wo.weight": "consolidated-00001-of-00051.safetensors",
|
17 |
+
"layers.1.attention.wq.weight": "consolidated-00001-of-00051.safetensors",
|
18 |
+
"layers.1.attention.wv.weight": "consolidated-00001-of-00051.safetensors",
|
19 |
+
"layers.1.attention_norm.weight": "consolidated-00001-of-00051.safetensors",
|
20 |
+
"layers.1.feed_forward.w1.weight": "consolidated-00001-of-00051.safetensors",
|
21 |
+
"layers.1.feed_forward.w2.weight": "consolidated-00001-of-00051.safetensors",
|
22 |
+
"layers.1.feed_forward.w3.weight": "consolidated-00002-of-00051.safetensors",
|
23 |
+
"layers.1.ffn_norm.weight": "consolidated-00002-of-00051.safetensors",
|
24 |
+
"layers.10.attention.wk.weight": "consolidated-00002-of-00051.safetensors",
|
25 |
+
"layers.10.attention.wo.weight": "consolidated-00002-of-00051.safetensors",
|
26 |
+
"layers.10.attention.wq.weight": "consolidated-00002-of-00051.safetensors",
|
27 |
+
"layers.10.attention.wv.weight": "consolidated-00002-of-00051.safetensors",
|
28 |
+
"layers.10.attention_norm.weight": "consolidated-00002-of-00051.safetensors",
|
29 |
+
"layers.10.feed_forward.w1.weight": "consolidated-00002-of-00051.safetensors",
|
30 |
+
"layers.10.feed_forward.w2.weight": "consolidated-00002-of-00051.safetensors",
|
31 |
+
"layers.10.feed_forward.w3.weight": "consolidated-00002-of-00051.safetensors",
|
32 |
+
"layers.10.ffn_norm.weight": "consolidated-00002-of-00051.safetensors",
|
33 |
+
"layers.11.attention.wk.weight": "consolidated-00002-of-00051.safetensors",
|
34 |
+
"layers.11.attention.wo.weight": "consolidated-00002-of-00051.safetensors",
|
35 |
+
"layers.11.attention.wq.weight": "consolidated-00002-of-00051.safetensors",
|
36 |
+
"layers.11.attention.wv.weight": "consolidated-00002-of-00051.safetensors",
|
37 |
+
"layers.11.attention_norm.weight": "consolidated-00002-of-00051.safetensors",
|
38 |
+
"layers.11.feed_forward.w1.weight": "consolidated-00002-of-00051.safetensors",
|
39 |
+
"layers.11.feed_forward.w2.weight": "consolidated-00003-of-00051.safetensors",
|
40 |
+
"layers.11.feed_forward.w3.weight": "consolidated-00003-of-00051.safetensors",
|
41 |
+
"layers.11.ffn_norm.weight": "consolidated-00003-of-00051.safetensors",
|
42 |
+
"layers.12.attention.wk.weight": "consolidated-00003-of-00051.safetensors",
|
43 |
+
"layers.12.attention.wo.weight": "consolidated-00003-of-00051.safetensors",
|
44 |
+
"layers.12.attention.wq.weight": "consolidated-00003-of-00051.safetensors",
|
45 |
+
"layers.12.attention.wv.weight": "consolidated-00003-of-00051.safetensors",
|
46 |
+
"layers.12.attention_norm.weight": "consolidated-00003-of-00051.safetensors",
|
47 |
+
"layers.12.feed_forward.w1.weight": "consolidated-00003-of-00051.safetensors",
|
48 |
+
"layers.12.feed_forward.w2.weight": "consolidated-00003-of-00051.safetensors",
|
49 |
+
"layers.12.feed_forward.w3.weight": "consolidated-00003-of-00051.safetensors",
|
50 |
+
"layers.12.ffn_norm.weight": "consolidated-00003-of-00051.safetensors",
|
51 |
+
"layers.13.attention.wk.weight": "consolidated-00003-of-00051.safetensors",
|
52 |
+
"layers.13.attention.wo.weight": "consolidated-00003-of-00051.safetensors",
|
53 |
+
"layers.13.attention.wq.weight": "consolidated-00003-of-00051.safetensors",
|
54 |
+
"layers.13.attention.wv.weight": "consolidated-00003-of-00051.safetensors",
|
55 |
+
"layers.13.attention_norm.weight": "consolidated-00003-of-00051.safetensors",
|
56 |
+
"layers.13.feed_forward.w1.weight": "consolidated-00004-of-00051.safetensors",
|
57 |
+
"layers.13.feed_forward.w2.weight": "consolidated-00004-of-00051.safetensors",
|
58 |
+
"layers.13.feed_forward.w3.weight": "consolidated-00004-of-00051.safetensors",
|
59 |
+
"layers.13.ffn_norm.weight": "consolidated-00004-of-00051.safetensors",
|
60 |
+
"layers.14.attention.wk.weight": "consolidated-00004-of-00051.safetensors",
|
61 |
+
"layers.14.attention.wo.weight": "consolidated-00004-of-00051.safetensors",
|
62 |
+
"layers.14.attention.wq.weight": "consolidated-00004-of-00051.safetensors",
|
63 |
+
"layers.14.attention.wv.weight": "consolidated-00004-of-00051.safetensors",
|
64 |
+
"layers.14.attention_norm.weight": "consolidated-00004-of-00051.safetensors",
|
65 |
+
"layers.14.feed_forward.w1.weight": "consolidated-00004-of-00051.safetensors",
|
66 |
+
"layers.14.feed_forward.w2.weight": "consolidated-00004-of-00051.safetensors",
|
67 |
+
"layers.14.feed_forward.w3.weight": "consolidated-00004-of-00051.safetensors",
|
68 |
+
"layers.14.ffn_norm.weight": "consolidated-00004-of-00051.safetensors",
|
69 |
+
"layers.15.attention.wk.weight": "consolidated-00004-of-00051.safetensors",
|
70 |
+
"layers.15.attention.wo.weight": "consolidated-00005-of-00051.safetensors",
|
71 |
+
"layers.15.attention.wq.weight": "consolidated-00005-of-00051.safetensors",
|
72 |
+
"layers.15.attention.wv.weight": "consolidated-00005-of-00051.safetensors",
|
73 |
+
"layers.15.attention_norm.weight": "consolidated-00005-of-00051.safetensors",
|
74 |
+
"layers.15.feed_forward.w1.weight": "consolidated-00005-of-00051.safetensors",
|
75 |
+
"layers.15.feed_forward.w2.weight": "consolidated-00005-of-00051.safetensors",
|
76 |
+
"layers.15.feed_forward.w3.weight": "consolidated-00005-of-00051.safetensors",
|
77 |
+
"layers.15.ffn_norm.weight": "consolidated-00005-of-00051.safetensors",
|
78 |
+
"layers.16.attention.wk.weight": "consolidated-00005-of-00051.safetensors",
|
79 |
+
"layers.16.attention.wo.weight": "consolidated-00005-of-00051.safetensors",
|
80 |
+
"layers.16.attention.wq.weight": "consolidated-00005-of-00051.safetensors",
|
81 |
+
"layers.16.attention.wv.weight": "consolidated-00005-of-00051.safetensors",
|
82 |
+
"layers.16.attention_norm.weight": "consolidated-00005-of-00051.safetensors",
|
83 |
+
"layers.16.feed_forward.w1.weight": "consolidated-00005-of-00051.safetensors",
|
84 |
+
"layers.16.feed_forward.w2.weight": "consolidated-00005-of-00051.safetensors",
|
85 |
+
"layers.16.feed_forward.w3.weight": "consolidated-00006-of-00051.safetensors",
|
86 |
+
"layers.16.ffn_norm.weight": "consolidated-00006-of-00051.safetensors",
|
87 |
+
"layers.17.attention.wk.weight": "consolidated-00006-of-00051.safetensors",
|
88 |
+
"layers.17.attention.wo.weight": "consolidated-00006-of-00051.safetensors",
|
89 |
+
"layers.17.attention.wq.weight": "consolidated-00006-of-00051.safetensors",
|
90 |
+
"layers.17.attention.wv.weight": "consolidated-00006-of-00051.safetensors",
|
91 |
+
"layers.17.attention_norm.weight": "consolidated-00006-of-00051.safetensors",
|
92 |
+
"layers.17.feed_forward.w1.weight": "consolidated-00006-of-00051.safetensors",
|
93 |
+
"layers.17.feed_forward.w2.weight": "consolidated-00006-of-00051.safetensors",
|
94 |
+
"layers.17.feed_forward.w3.weight": "consolidated-00006-of-00051.safetensors",
|
95 |
+
"layers.17.ffn_norm.weight": "consolidated-00006-of-00051.safetensors",
|
96 |
+
"layers.18.attention.wk.weight": "consolidated-00006-of-00051.safetensors",
|
97 |
+
"layers.18.attention.wo.weight": "consolidated-00006-of-00051.safetensors",
|
98 |
+
"layers.18.attention.wq.weight": "consolidated-00006-of-00051.safetensors",
|
99 |
+
"layers.18.attention.wv.weight": "consolidated-00006-of-00051.safetensors",
|
100 |
+
"layers.18.attention_norm.weight": "consolidated-00006-of-00051.safetensors",
|
101 |
+
"layers.18.feed_forward.w1.weight": "consolidated-00006-of-00051.safetensors",
|
102 |
+
"layers.18.feed_forward.w2.weight": "consolidated-00007-of-00051.safetensors",
|
103 |
+
"layers.18.feed_forward.w3.weight": "consolidated-00007-of-00051.safetensors",
|
104 |
+
"layers.18.ffn_norm.weight": "consolidated-00007-of-00051.safetensors",
|
105 |
+
"layers.19.attention.wk.weight": "consolidated-00007-of-00051.safetensors",
|
106 |
+
"layers.19.attention.wo.weight": "consolidated-00007-of-00051.safetensors",
|
107 |
+
"layers.19.attention.wq.weight": "consolidated-00007-of-00051.safetensors",
|
108 |
+
"layers.19.attention.wv.weight": "consolidated-00007-of-00051.safetensors",
|
109 |
+
"layers.19.attention_norm.weight": "consolidated-00007-of-00051.safetensors",
|
110 |
+
"layers.19.feed_forward.w1.weight": "consolidated-00007-of-00051.safetensors",
|
111 |
+
"layers.19.feed_forward.w2.weight": "consolidated-00007-of-00051.safetensors",
|
112 |
+
"layers.19.feed_forward.w3.weight": "consolidated-00007-of-00051.safetensors",
|
113 |
+
"layers.19.ffn_norm.weight": "consolidated-00007-of-00051.safetensors",
|
114 |
+
"layers.2.attention.wk.weight": "consolidated-00007-of-00051.safetensors",
|
115 |
+
"layers.2.attention.wo.weight": "consolidated-00007-of-00051.safetensors",
|
116 |
+
"layers.2.attention.wq.weight": "consolidated-00007-of-00051.safetensors",
|
117 |
+
"layers.2.attention.wv.weight": "consolidated-00007-of-00051.safetensors",
|
118 |
+
"layers.2.attention_norm.weight": "consolidated-00007-of-00051.safetensors",
|
119 |
+
"layers.2.feed_forward.w1.weight": "consolidated-00008-of-00051.safetensors",
|
120 |
+
"layers.2.feed_forward.w2.weight": "consolidated-00008-of-00051.safetensors",
|
121 |
+
"layers.2.feed_forward.w3.weight": "consolidated-00008-of-00051.safetensors",
|
122 |
+
"layers.2.ffn_norm.weight": "consolidated-00008-of-00051.safetensors",
|
123 |
+
"layers.20.attention.wk.weight": "consolidated-00008-of-00051.safetensors",
|
124 |
+
"layers.20.attention.wo.weight": "consolidated-00008-of-00051.safetensors",
|
125 |
+
"layers.20.attention.wq.weight": "consolidated-00008-of-00051.safetensors",
|
126 |
+
"layers.20.attention.wv.weight": "consolidated-00008-of-00051.safetensors",
|
127 |
+
"layers.20.attention_norm.weight": "consolidated-00008-of-00051.safetensors",
|
128 |
+
"layers.20.feed_forward.w1.weight": "consolidated-00008-of-00051.safetensors",
|
129 |
+
"layers.20.feed_forward.w2.weight": "consolidated-00008-of-00051.safetensors",
|
130 |
+
"layers.20.feed_forward.w3.weight": "consolidated-00008-of-00051.safetensors",
|
131 |
+
"layers.20.ffn_norm.weight": "consolidated-00008-of-00051.safetensors",
|
132 |
+
"layers.21.attention.wk.weight": "consolidated-00008-of-00051.safetensors",
|
133 |
+
"layers.21.attention.wo.weight": "consolidated-00009-of-00051.safetensors",
|
134 |
+
"layers.21.attention.wq.weight": "consolidated-00009-of-00051.safetensors",
|
135 |
+
"layers.21.attention.wv.weight": "consolidated-00009-of-00051.safetensors",
|
136 |
+
"layers.21.attention_norm.weight": "consolidated-00009-of-00051.safetensors",
|
137 |
+
"layers.21.feed_forward.w1.weight": "consolidated-00009-of-00051.safetensors",
|
138 |
+
"layers.21.feed_forward.w2.weight": "consolidated-00009-of-00051.safetensors",
|
139 |
+
"layers.21.feed_forward.w3.weight": "consolidated-00009-of-00051.safetensors",
|
140 |
+
"layers.21.ffn_norm.weight": "consolidated-00009-of-00051.safetensors",
|
141 |
+
"layers.22.attention.wk.weight": "consolidated-00009-of-00051.safetensors",
|
142 |
+
"layers.22.attention.wo.weight": "consolidated-00009-of-00051.safetensors",
|
143 |
+
"layers.22.attention.wq.weight": "consolidated-00009-of-00051.safetensors",
|
144 |
+
"layers.22.attention.wv.weight": "consolidated-00009-of-00051.safetensors",
|
145 |
+
"layers.22.attention_norm.weight": "consolidated-00009-of-00051.safetensors",
|
146 |
+
"layers.22.feed_forward.w1.weight": "consolidated-00009-of-00051.safetensors",
|
147 |
+
"layers.22.feed_forward.w2.weight": "consolidated-00009-of-00051.safetensors",
|
148 |
+
"layers.22.feed_forward.w3.weight": "consolidated-00010-of-00051.safetensors",
|
149 |
+
"layers.22.ffn_norm.weight": "consolidated-00010-of-00051.safetensors",
|
150 |
+
"layers.23.attention.wk.weight": "consolidated-00010-of-00051.safetensors",
|
151 |
+
"layers.23.attention.wo.weight": "consolidated-00010-of-00051.safetensors",
|
152 |
+
"layers.23.attention.wq.weight": "consolidated-00010-of-00051.safetensors",
|
153 |
+
"layers.23.attention.wv.weight": "consolidated-00010-of-00051.safetensors",
|
154 |
+
"layers.23.attention_norm.weight": "consolidated-00010-of-00051.safetensors",
|
155 |
+
"layers.23.feed_forward.w1.weight": "consolidated-00010-of-00051.safetensors",
|
156 |
+
"layers.23.feed_forward.w2.weight": "consolidated-00010-of-00051.safetensors",
|
157 |
+
"layers.23.feed_forward.w3.weight": "consolidated-00010-of-00051.safetensors",
|
158 |
+
"layers.23.ffn_norm.weight": "consolidated-00010-of-00051.safetensors",
|
159 |
+
"layers.24.attention.wk.weight": "consolidated-00010-of-00051.safetensors",
|
160 |
+
"layers.24.attention.wo.weight": "consolidated-00010-of-00051.safetensors",
|
161 |
+
"layers.24.attention.wq.weight": "consolidated-00010-of-00051.safetensors",
|
162 |
+
"layers.24.attention.wv.weight": "consolidated-00010-of-00051.safetensors",
|
163 |
+
"layers.24.attention_norm.weight": "consolidated-00010-of-00051.safetensors",
|
164 |
+
"layers.24.feed_forward.w1.weight": "consolidated-00010-of-00051.safetensors",
|
165 |
+
"layers.24.feed_forward.w2.weight": "consolidated-00011-of-00051.safetensors",
|
166 |
+
"layers.24.feed_forward.w3.weight": "consolidated-00011-of-00051.safetensors",
|
167 |
+
"layers.24.ffn_norm.weight": "consolidated-00011-of-00051.safetensors",
|
168 |
+
"layers.25.attention.wk.weight": "consolidated-00011-of-00051.safetensors",
|
169 |
+
"layers.25.attention.wo.weight": "consolidated-00011-of-00051.safetensors",
|
170 |
+
"layers.25.attention.wq.weight": "consolidated-00011-of-00051.safetensors",
|
171 |
+
"layers.25.attention.wv.weight": "consolidated-00011-of-00051.safetensors",
|
172 |
+
"layers.25.attention_norm.weight": "consolidated-00011-of-00051.safetensors",
|
173 |
+
"layers.25.feed_forward.w1.weight": "consolidated-00011-of-00051.safetensors",
|
174 |
+
"layers.25.feed_forward.w2.weight": "consolidated-00011-of-00051.safetensors",
|
175 |
+
"layers.25.feed_forward.w3.weight": "consolidated-00011-of-00051.safetensors",
|
176 |
+
"layers.25.ffn_norm.weight": "consolidated-00011-of-00051.safetensors",
|
177 |
+
"layers.26.attention.wk.weight": "consolidated-00011-of-00051.safetensors",
|
178 |
+
"layers.26.attention.wo.weight": "consolidated-00011-of-00051.safetensors",
|
179 |
+
"layers.26.attention.wq.weight": "consolidated-00011-of-00051.safetensors",
|
180 |
+
"layers.26.attention.wv.weight": "consolidated-00011-of-00051.safetensors",
|
181 |
+
"layers.26.attention_norm.weight": "consolidated-00011-of-00051.safetensors",
|
182 |
+
"layers.26.feed_forward.w1.weight": "consolidated-00012-of-00051.safetensors",
|
183 |
+
"layers.26.feed_forward.w2.weight": "consolidated-00012-of-00051.safetensors",
|
184 |
+
"layers.26.feed_forward.w3.weight": "consolidated-00012-of-00051.safetensors",
|
185 |
+
"layers.26.ffn_norm.weight": "consolidated-00012-of-00051.safetensors",
|
186 |
+
"layers.27.attention.wk.weight": "consolidated-00012-of-00051.safetensors",
|
187 |
+
"layers.27.attention.wo.weight": "consolidated-00012-of-00051.safetensors",
|
188 |
+
"layers.27.attention.wq.weight": "consolidated-00012-of-00051.safetensors",
|
189 |
+
"layers.27.attention.wv.weight": "consolidated-00012-of-00051.safetensors",
|
190 |
+
"layers.27.attention_norm.weight": "consolidated-00012-of-00051.safetensors",
|
191 |
+
"layers.27.feed_forward.w1.weight": "consolidated-00012-of-00051.safetensors",
|
192 |
+
"layers.27.feed_forward.w2.weight": "consolidated-00012-of-00051.safetensors",
|
193 |
+
"layers.27.feed_forward.w3.weight": "consolidated-00012-of-00051.safetensors",
|
194 |
+
"layers.27.ffn_norm.weight": "consolidated-00012-of-00051.safetensors",
|
195 |
+
"layers.28.attention.wk.weight": "consolidated-00012-of-00051.safetensors",
|
196 |
+
"layers.28.attention.wo.weight": "consolidated-00013-of-00051.safetensors",
|
197 |
+
"layers.28.attention.wq.weight": "consolidated-00013-of-00051.safetensors",
|
198 |
+
"layers.28.attention.wv.weight": "consolidated-00013-of-00051.safetensors",
|
199 |
+
"layers.28.attention_norm.weight": "consolidated-00013-of-00051.safetensors",
|
200 |
+
"layers.28.feed_forward.w1.weight": "consolidated-00013-of-00051.safetensors",
|
201 |
+
"layers.28.feed_forward.w2.weight": "consolidated-00013-of-00051.safetensors",
|
202 |
+
"layers.28.feed_forward.w3.weight": "consolidated-00013-of-00051.safetensors",
|
203 |
+
"layers.28.ffn_norm.weight": "consolidated-00013-of-00051.safetensors",
|
204 |
+
"layers.29.attention.wk.weight": "consolidated-00013-of-00051.safetensors",
|
205 |
+
"layers.29.attention.wo.weight": "consolidated-00013-of-00051.safetensors",
|
206 |
+
"layers.29.attention.wq.weight": "consolidated-00013-of-00051.safetensors",
|
207 |
+
"layers.29.attention.wv.weight": "consolidated-00013-of-00051.safetensors",
|
208 |
+
"layers.29.attention_norm.weight": "consolidated-00013-of-00051.safetensors",
|
209 |
+
"layers.29.feed_forward.w1.weight": "consolidated-00013-of-00051.safetensors",
|
210 |
+
"layers.29.feed_forward.w2.weight": "consolidated-00013-of-00051.safetensors",
|
211 |
+
"layers.29.feed_forward.w3.weight": "consolidated-00014-of-00051.safetensors",
|
212 |
+
"layers.29.ffn_norm.weight": "consolidated-00014-of-00051.safetensors",
|
213 |
+
"layers.3.attention.wk.weight": "consolidated-00014-of-00051.safetensors",
|
214 |
+
"layers.3.attention.wo.weight": "consolidated-00014-of-00051.safetensors",
|
215 |
+
"layers.3.attention.wq.weight": "consolidated-00014-of-00051.safetensors",
|
216 |
+
"layers.3.attention.wv.weight": "consolidated-00014-of-00051.safetensors",
|
217 |
+
"layers.3.attention_norm.weight": "consolidated-00014-of-00051.safetensors",
|
218 |
+
"layers.3.feed_forward.w1.weight": "consolidated-00014-of-00051.safetensors",
|
219 |
+
"layers.3.feed_forward.w2.weight": "consolidated-00014-of-00051.safetensors",
|
220 |
+
"layers.3.feed_forward.w3.weight": "consolidated-00014-of-00051.safetensors",
|
221 |
+
"layers.3.ffn_norm.weight": "consolidated-00014-of-00051.safetensors",
|
222 |
+
"layers.30.attention.wk.weight": "consolidated-00014-of-00051.safetensors",
|
223 |
+
"layers.30.attention.wo.weight": "consolidated-00014-of-00051.safetensors",
|
224 |
+
"layers.30.attention.wq.weight": "consolidated-00014-of-00051.safetensors",
|
225 |
+
"layers.30.attention.wv.weight": "consolidated-00014-of-00051.safetensors",
|
226 |
+
"layers.30.attention_norm.weight": "consolidated-00014-of-00051.safetensors",
|
227 |
+
"layers.30.feed_forward.w1.weight": "consolidated-00014-of-00051.safetensors",
|
228 |
+
"layers.30.feed_forward.w2.weight": "consolidated-00015-of-00051.safetensors",
|
229 |
+
"layers.30.feed_forward.w3.weight": "consolidated-00015-of-00051.safetensors",
|
230 |
+
"layers.30.ffn_norm.weight": "consolidated-00015-of-00051.safetensors",
|
231 |
+
"layers.31.attention.wk.weight": "consolidated-00015-of-00051.safetensors",
|
232 |
+
"layers.31.attention.wo.weight": "consolidated-00015-of-00051.safetensors",
|
233 |
+
"layers.31.attention.wq.weight": "consolidated-00015-of-00051.safetensors",
|
234 |
+
"layers.31.attention.wv.weight": "consolidated-00015-of-00051.safetensors",
|
235 |
+
"layers.31.attention_norm.weight": "consolidated-00015-of-00051.safetensors",
|
236 |
+
"layers.31.feed_forward.w1.weight": "consolidated-00015-of-00051.safetensors",
|
237 |
+
"layers.31.feed_forward.w2.weight": "consolidated-00015-of-00051.safetensors",
|
238 |
+
"layers.31.feed_forward.w3.weight": "consolidated-00015-of-00051.safetensors",
|
239 |
+
"layers.31.ffn_norm.weight": "consolidated-00015-of-00051.safetensors",
|
240 |
+
"layers.32.attention.wk.weight": "consolidated-00015-of-00051.safetensors",
|
241 |
+
"layers.32.attention.wo.weight": "consolidated-00015-of-00051.safetensors",
|
242 |
+
"layers.32.attention.wq.weight": "consolidated-00015-of-00051.safetensors",
|
243 |
+
"layers.32.attention.wv.weight": "consolidated-00015-of-00051.safetensors",
|
244 |
+
"layers.32.attention_norm.weight": "consolidated-00015-of-00051.safetensors",
|
245 |
+
"layers.32.feed_forward.w1.weight": "consolidated-00016-of-00051.safetensors",
|
246 |
+
"layers.32.feed_forward.w2.weight": "consolidated-00016-of-00051.safetensors",
|
247 |
+
"layers.32.feed_forward.w3.weight": "consolidated-00016-of-00051.safetensors",
|
248 |
+
"layers.32.ffn_norm.weight": "consolidated-00016-of-00051.safetensors",
|
249 |
+
"layers.33.attention.wk.weight": "consolidated-00016-of-00051.safetensors",
|
250 |
+
"layers.33.attention.wo.weight": "consolidated-00016-of-00051.safetensors",
|
251 |
+
"layers.33.attention.wq.weight": "consolidated-00016-of-00051.safetensors",
|
252 |
+
"layers.33.attention.wv.weight": "consolidated-00016-of-00051.safetensors",
|
253 |
+
"layers.33.attention_norm.weight": "consolidated-00016-of-00051.safetensors",
|
254 |
+
"layers.33.feed_forward.w1.weight": "consolidated-00016-of-00051.safetensors",
|
255 |
+
"layers.33.feed_forward.w2.weight": "consolidated-00016-of-00051.safetensors",
|
256 |
+
"layers.33.feed_forward.w3.weight": "consolidated-00016-of-00051.safetensors",
|
257 |
+
"layers.33.ffn_norm.weight": "consolidated-00016-of-00051.safetensors",
|
258 |
+
"layers.34.attention.wk.weight": "consolidated-00016-of-00051.safetensors",
|
259 |
+
"layers.34.attention.wo.weight": "consolidated-00017-of-00051.safetensors",
|
260 |
+
"layers.34.attention.wq.weight": "consolidated-00017-of-00051.safetensors",
|
261 |
+
"layers.34.attention.wv.weight": "consolidated-00017-of-00051.safetensors",
|
262 |
+
"layers.34.attention_norm.weight": "consolidated-00017-of-00051.safetensors",
|
263 |
+
"layers.34.feed_forward.w1.weight": "consolidated-00017-of-00051.safetensors",
|
264 |
+
"layers.34.feed_forward.w2.weight": "consolidated-00017-of-00051.safetensors",
|
265 |
+
"layers.34.feed_forward.w3.weight": "consolidated-00017-of-00051.safetensors",
|
266 |
+
"layers.34.ffn_norm.weight": "consolidated-00017-of-00051.safetensors",
|
267 |
+
"layers.35.attention.wk.weight": "consolidated-00017-of-00051.safetensors",
|
268 |
+
"layers.35.attention.wo.weight": "consolidated-00017-of-00051.safetensors",
|
269 |
+
"layers.35.attention.wq.weight": "consolidated-00017-of-00051.safetensors",
|
270 |
+
"layers.35.attention.wv.weight": "consolidated-00017-of-00051.safetensors",
|
271 |
+
"layers.35.attention_norm.weight": "consolidated-00017-of-00051.safetensors",
|
272 |
+
"layers.35.feed_forward.w1.weight": "consolidated-00017-of-00051.safetensors",
|
273 |
+
"layers.35.feed_forward.w2.weight": "consolidated-00017-of-00051.safetensors",
|
274 |
+
"layers.35.feed_forward.w3.weight": "consolidated-00018-of-00051.safetensors",
|
275 |
+
"layers.35.ffn_norm.weight": "consolidated-00018-of-00051.safetensors",
|
276 |
+
"layers.36.attention.wk.weight": "consolidated-00018-of-00051.safetensors",
|
277 |
+
"layers.36.attention.wo.weight": "consolidated-00018-of-00051.safetensors",
|
278 |
+
"layers.36.attention.wq.weight": "consolidated-00018-of-00051.safetensors",
|
279 |
+
"layers.36.attention.wv.weight": "consolidated-00018-of-00051.safetensors",
|
280 |
+
"layers.36.attention_norm.weight": "consolidated-00018-of-00051.safetensors",
|
281 |
+
"layers.36.feed_forward.w1.weight": "consolidated-00018-of-00051.safetensors",
|
282 |
+
"layers.36.feed_forward.w2.weight": "consolidated-00018-of-00051.safetensors",
|
283 |
+
"layers.36.feed_forward.w3.weight": "consolidated-00018-of-00051.safetensors",
|
284 |
+
"layers.36.ffn_norm.weight": "consolidated-00018-of-00051.safetensors",
|
285 |
+
"layers.37.attention.wk.weight": "consolidated-00018-of-00051.safetensors",
|
286 |
+
"layers.37.attention.wo.weight": "consolidated-00018-of-00051.safetensors",
|
287 |
+
"layers.37.attention.wq.weight": "consolidated-00018-of-00051.safetensors",
|
288 |
+
"layers.37.attention.wv.weight": "consolidated-00018-of-00051.safetensors",
|
289 |
+
"layers.37.attention_norm.weight": "consolidated-00018-of-00051.safetensors",
|
290 |
+
"layers.37.feed_forward.w1.weight": "consolidated-00018-of-00051.safetensors",
|
291 |
+
"layers.37.feed_forward.w2.weight": "consolidated-00019-of-00051.safetensors",
|
292 |
+
"layers.37.feed_forward.w3.weight": "consolidated-00019-of-00051.safetensors",
|
293 |
+
"layers.37.ffn_norm.weight": "consolidated-00019-of-00051.safetensors",
|
294 |
+
"layers.38.attention.wk.weight": "consolidated-00019-of-00051.safetensors",
|
295 |
+
"layers.38.attention.wo.weight": "consolidated-00019-of-00051.safetensors",
|
296 |
+
"layers.38.attention.wq.weight": "consolidated-00019-of-00051.safetensors",
|
297 |
+
"layers.38.attention.wv.weight": "consolidated-00019-of-00051.safetensors",
|
298 |
+
"layers.38.attention_norm.weight": "consolidated-00019-of-00051.safetensors",
|
299 |
+
"layers.38.feed_forward.w1.weight": "consolidated-00019-of-00051.safetensors",
|
300 |
+
"layers.38.feed_forward.w2.weight": "consolidated-00019-of-00051.safetensors",
|
301 |
+
"layers.38.feed_forward.w3.weight": "consolidated-00019-of-00051.safetensors",
|
302 |
+
"layers.38.ffn_norm.weight": "consolidated-00019-of-00051.safetensors",
|
303 |
+
"layers.39.attention.wk.weight": "consolidated-00019-of-00051.safetensors",
|
304 |
+
"layers.39.attention.wo.weight": "consolidated-00019-of-00051.safetensors",
|
305 |
+
"layers.39.attention.wq.weight": "consolidated-00019-of-00051.safetensors",
|
306 |
+
"layers.39.attention.wv.weight": "consolidated-00019-of-00051.safetensors",
|
307 |
+
"layers.39.attention_norm.weight": "consolidated-00019-of-00051.safetensors",
|
308 |
+
"layers.39.feed_forward.w1.weight": "consolidated-00020-of-00051.safetensors",
|
309 |
+
"layers.39.feed_forward.w2.weight": "consolidated-00020-of-00051.safetensors",
|
310 |
+
"layers.39.feed_forward.w3.weight": "consolidated-00020-of-00051.safetensors",
|
311 |
+
"layers.39.ffn_norm.weight": "consolidated-00020-of-00051.safetensors",
|
312 |
+
"layers.4.attention.wk.weight": "consolidated-00020-of-00051.safetensors",
|
313 |
+
"layers.4.attention.wo.weight": "consolidated-00020-of-00051.safetensors",
|
314 |
+
"layers.4.attention.wq.weight": "consolidated-00020-of-00051.safetensors",
|
315 |
+
"layers.4.attention.wv.weight": "consolidated-00020-of-00051.safetensors",
|
316 |
+
"layers.4.attention_norm.weight": "consolidated-00020-of-00051.safetensors",
|
317 |
+
"layers.4.feed_forward.w1.weight": "consolidated-00020-of-00051.safetensors",
|
318 |
+
"layers.4.feed_forward.w2.weight": "consolidated-00020-of-00051.safetensors",
|
319 |
+
"layers.4.feed_forward.w3.weight": "consolidated-00020-of-00051.safetensors",
|
320 |
+
"layers.4.ffn_norm.weight": "consolidated-00020-of-00051.safetensors",
|
321 |
+
"layers.40.attention.wk.weight": "consolidated-00020-of-00051.safetensors",
|
322 |
+
"layers.40.attention.wo.weight": "consolidated-00021-of-00051.safetensors",
|
323 |
+
"layers.40.attention.wq.weight": "consolidated-00021-of-00051.safetensors",
|
324 |
+
"layers.40.attention.wv.weight": "consolidated-00021-of-00051.safetensors",
|
325 |
+
"layers.40.attention_norm.weight": "consolidated-00021-of-00051.safetensors",
|
326 |
+
"layers.40.feed_forward.w1.weight": "consolidated-00021-of-00051.safetensors",
|
327 |
+
"layers.40.feed_forward.w2.weight": "consolidated-00021-of-00051.safetensors",
|
328 |
+
"layers.40.feed_forward.w3.weight": "consolidated-00021-of-00051.safetensors",
|
329 |
+
"layers.40.ffn_norm.weight": "consolidated-00021-of-00051.safetensors",
|
330 |
+
"layers.41.attention.wk.weight": "consolidated-00021-of-00051.safetensors",
|
331 |
+
"layers.41.attention.wo.weight": "consolidated-00021-of-00051.safetensors",
|
332 |
+
"layers.41.attention.wq.weight": "consolidated-00021-of-00051.safetensors",
|
333 |
+
"layers.41.attention.wv.weight": "consolidated-00021-of-00051.safetensors",
|
334 |
+
"layers.41.attention_norm.weight": "consolidated-00021-of-00051.safetensors",
|
335 |
+
"layers.41.feed_forward.w1.weight": "consolidated-00021-of-00051.safetensors",
|
336 |
+
"layers.41.feed_forward.w2.weight": "consolidated-00021-of-00051.safetensors",
|
337 |
+
"layers.41.feed_forward.w3.weight": "consolidated-00022-of-00051.safetensors",
|
338 |
+
"layers.41.ffn_norm.weight": "consolidated-00022-of-00051.safetensors",
|
339 |
+
"layers.42.attention.wk.weight": "consolidated-00022-of-00051.safetensors",
|
340 |
+
"layers.42.attention.wo.weight": "consolidated-00022-of-00051.safetensors",
|
341 |
+
"layers.42.attention.wq.weight": "consolidated-00022-of-00051.safetensors",
|
342 |
+
"layers.42.attention.wv.weight": "consolidated-00022-of-00051.safetensors",
|
343 |
+
"layers.42.attention_norm.weight": "consolidated-00022-of-00051.safetensors",
|
344 |
+
"layers.42.feed_forward.w1.weight": "consolidated-00022-of-00051.safetensors",
|
345 |
+
"layers.42.feed_forward.w2.weight": "consolidated-00022-of-00051.safetensors",
|
346 |
+
"layers.42.feed_forward.w3.weight": "consolidated-00022-of-00051.safetensors",
|
347 |
+
"layers.42.ffn_norm.weight": "consolidated-00022-of-00051.safetensors",
|
348 |
+
"layers.43.attention.wk.weight": "consolidated-00022-of-00051.safetensors",
|
349 |
+
"layers.43.attention.wo.weight": "consolidated-00022-of-00051.safetensors",
|
350 |
+
"layers.43.attention.wq.weight": "consolidated-00022-of-00051.safetensors",
|
351 |
+
"layers.43.attention.wv.weight": "consolidated-00022-of-00051.safetensors",
|
352 |
+
"layers.43.attention_norm.weight": "consolidated-00022-of-00051.safetensors",
|
353 |
+
"layers.43.feed_forward.w1.weight": "consolidated-00022-of-00051.safetensors",
|
354 |
+
"layers.43.feed_forward.w2.weight": "consolidated-00023-of-00051.safetensors",
|
355 |
+
"layers.43.feed_forward.w3.weight": "consolidated-00023-of-00051.safetensors",
|
356 |
+
"layers.43.ffn_norm.weight": "consolidated-00023-of-00051.safetensors",
|
357 |
+
"layers.44.attention.wk.weight": "consolidated-00023-of-00051.safetensors",
|
358 |
+
"layers.44.attention.wo.weight": "consolidated-00023-of-00051.safetensors",
|
359 |
+
"layers.44.attention.wq.weight": "consolidated-00023-of-00051.safetensors",
|
360 |
+
"layers.44.attention.wv.weight": "consolidated-00023-of-00051.safetensors",
|
361 |
+
"layers.44.attention_norm.weight": "consolidated-00023-of-00051.safetensors",
|
362 |
+
"layers.44.feed_forward.w1.weight": "consolidated-00023-of-00051.safetensors",
|
363 |
+
"layers.44.feed_forward.w2.weight": "consolidated-00023-of-00051.safetensors",
|
364 |
+
"layers.44.feed_forward.w3.weight": "consolidated-00023-of-00051.safetensors",
|
365 |
+
"layers.44.ffn_norm.weight": "consolidated-00023-of-00051.safetensors",
|
366 |
+
"layers.45.attention.wk.weight": "consolidated-00023-of-00051.safetensors",
|
367 |
+
"layers.45.attention.wo.weight": "consolidated-00023-of-00051.safetensors",
|
368 |
+
"layers.45.attention.wq.weight": "consolidated-00023-of-00051.safetensors",
|
369 |
+
"layers.45.attention.wv.weight": "consolidated-00023-of-00051.safetensors",
|
370 |
+
"layers.45.attention_norm.weight": "consolidated-00023-of-00051.safetensors",
|
371 |
+
"layers.45.feed_forward.w1.weight": "consolidated-00024-of-00051.safetensors",
|
372 |
+
"layers.45.feed_forward.w2.weight": "consolidated-00024-of-00051.safetensors",
|
373 |
+
"layers.45.feed_forward.w3.weight": "consolidated-00024-of-00051.safetensors",
|
374 |
+
"layers.45.ffn_norm.weight": "consolidated-00024-of-00051.safetensors",
|
375 |
+
"layers.46.attention.wk.weight": "consolidated-00024-of-00051.safetensors",
|
376 |
+
"layers.46.attention.wo.weight": "consolidated-00024-of-00051.safetensors",
|
377 |
+
"layers.46.attention.wq.weight": "consolidated-00024-of-00051.safetensors",
|
378 |
+
"layers.46.attention.wv.weight": "consolidated-00024-of-00051.safetensors",
|
379 |
+
"layers.46.attention_norm.weight": "consolidated-00024-of-00051.safetensors",
|
380 |
+
"layers.46.feed_forward.w1.weight": "consolidated-00024-of-00051.safetensors",
|
381 |
+
"layers.46.feed_forward.w2.weight": "consolidated-00024-of-00051.safetensors",
|
382 |
+
"layers.46.feed_forward.w3.weight": "consolidated-00024-of-00051.safetensors",
|
383 |
+
"layers.46.ffn_norm.weight": "consolidated-00024-of-00051.safetensors",
|
384 |
+
"layers.47.attention.wk.weight": "consolidated-00024-of-00051.safetensors",
|
385 |
+
"layers.47.attention.wo.weight": "consolidated-00025-of-00051.safetensors",
|
386 |
+
"layers.47.attention.wq.weight": "consolidated-00025-of-00051.safetensors",
|
387 |
+
"layers.47.attention.wv.weight": "consolidated-00025-of-00051.safetensors",
|
388 |
+
"layers.47.attention_norm.weight": "consolidated-00025-of-00051.safetensors",
|
389 |
+
"layers.47.feed_forward.w1.weight": "consolidated-00025-of-00051.safetensors",
|
390 |
+
"layers.47.feed_forward.w2.weight": "consolidated-00025-of-00051.safetensors",
|
391 |
+
"layers.47.feed_forward.w3.weight": "consolidated-00025-of-00051.safetensors",
|
392 |
+
"layers.47.ffn_norm.weight": "consolidated-00025-of-00051.safetensors",
|
393 |
+
"layers.48.attention.wk.weight": "consolidated-00025-of-00051.safetensors",
|
394 |
+
"layers.48.attention.wo.weight": "consolidated-00025-of-00051.safetensors",
|
395 |
+
"layers.48.attention.wq.weight": "consolidated-00025-of-00051.safetensors",
|
396 |
+
"layers.48.attention.wv.weight": "consolidated-00025-of-00051.safetensors",
|
397 |
+
"layers.48.attention_norm.weight": "consolidated-00025-of-00051.safetensors",
|
398 |
+
"layers.48.feed_forward.w1.weight": "consolidated-00025-of-00051.safetensors",
|
399 |
+
"layers.48.feed_forward.w2.weight": "consolidated-00025-of-00051.safetensors",
|
400 |
+
"layers.48.feed_forward.w3.weight": "consolidated-00026-of-00051.safetensors",
|
401 |
+
"layers.48.ffn_norm.weight": "consolidated-00026-of-00051.safetensors",
|
402 |
+
"layers.49.attention.wk.weight": "consolidated-00026-of-00051.safetensors",
|
403 |
+
"layers.49.attention.wo.weight": "consolidated-00026-of-00051.safetensors",
|
404 |
+
"layers.49.attention.wq.weight": "consolidated-00026-of-00051.safetensors",
|
405 |
+
"layers.49.attention.wv.weight": "consolidated-00026-of-00051.safetensors",
|
406 |
+
"layers.49.attention_norm.weight": "consolidated-00026-of-00051.safetensors",
|
407 |
+
"layers.49.feed_forward.w1.weight": "consolidated-00026-of-00051.safetensors",
|
408 |
+
"layers.49.feed_forward.w2.weight": "consolidated-00026-of-00051.safetensors",
|
409 |
+
"layers.49.feed_forward.w3.weight": "consolidated-00026-of-00051.safetensors",
|
410 |
+
"layers.49.ffn_norm.weight": "consolidated-00026-of-00051.safetensors",
|
411 |
+
"layers.5.attention.wk.weight": "consolidated-00026-of-00051.safetensors",
|
412 |
+
"layers.5.attention.wo.weight": "consolidated-00026-of-00051.safetensors",
|
413 |
+
"layers.5.attention.wq.weight": "consolidated-00026-of-00051.safetensors",
|
414 |
+
"layers.5.attention.wv.weight": "consolidated-00026-of-00051.safetensors",
|
415 |
+
"layers.5.attention_norm.weight": "consolidated-00026-of-00051.safetensors",
|
416 |
+
"layers.5.feed_forward.w1.weight": "consolidated-00026-of-00051.safetensors",
|
417 |
+
"layers.5.feed_forward.w2.weight": "consolidated-00027-of-00051.safetensors",
|
418 |
+
"layers.5.feed_forward.w3.weight": "consolidated-00027-of-00051.safetensors",
|
419 |
+
"layers.5.ffn_norm.weight": "consolidated-00027-of-00051.safetensors",
|
420 |
+
"layers.50.attention.wk.weight": "consolidated-00027-of-00051.safetensors",
|
421 |
+
"layers.50.attention.wo.weight": "consolidated-00027-of-00051.safetensors",
|
422 |
+
"layers.50.attention.wq.weight": "consolidated-00027-of-00051.safetensors",
|
423 |
+
"layers.50.attention.wv.weight": "consolidated-00027-of-00051.safetensors",
|
424 |
+
"layers.50.attention_norm.weight": "consolidated-00027-of-00051.safetensors",
|
425 |
+
"layers.50.feed_forward.w1.weight": "consolidated-00027-of-00051.safetensors",
|
426 |
+
"layers.50.feed_forward.w2.weight": "consolidated-00027-of-00051.safetensors",
|
427 |
+
"layers.50.feed_forward.w3.weight": "consolidated-00027-of-00051.safetensors",
|
428 |
+
"layers.50.ffn_norm.weight": "consolidated-00027-of-00051.safetensors",
|
429 |
+
"layers.51.attention.wk.weight": "consolidated-00027-of-00051.safetensors",
|
430 |
+
"layers.51.attention.wo.weight": "consolidated-00027-of-00051.safetensors",
|
431 |
+
"layers.51.attention.wq.weight": "consolidated-00027-of-00051.safetensors",
|
432 |
+
"layers.51.attention.wv.weight": "consolidated-00027-of-00051.safetensors",
|
433 |
+
"layers.51.attention_norm.weight": "consolidated-00027-of-00051.safetensors",
|
434 |
+
"layers.51.feed_forward.w1.weight": "consolidated-00028-of-00051.safetensors",
|
435 |
+
"layers.51.feed_forward.w2.weight": "consolidated-00028-of-00051.safetensors",
|
436 |
+
"layers.51.feed_forward.w3.weight": "consolidated-00028-of-00051.safetensors",
|
437 |
+
"layers.51.ffn_norm.weight": "consolidated-00028-of-00051.safetensors",
|
438 |
+
"layers.52.attention.wk.weight": "consolidated-00028-of-00051.safetensors",
|
439 |
+
"layers.52.attention.wo.weight": "consolidated-00028-of-00051.safetensors",
|
440 |
+
"layers.52.attention.wq.weight": "consolidated-00028-of-00051.safetensors",
|
441 |
+
"layers.52.attention.wv.weight": "consolidated-00028-of-00051.safetensors",
|
442 |
+
"layers.52.attention_norm.weight": "consolidated-00028-of-00051.safetensors",
|
443 |
+
"layers.52.feed_forward.w1.weight": "consolidated-00028-of-00051.safetensors",
|
444 |
+
"layers.52.feed_forward.w2.weight": "consolidated-00028-of-00051.safetensors",
|
445 |
+
"layers.52.feed_forward.w3.weight": "consolidated-00028-of-00051.safetensors",
|
446 |
+
"layers.52.ffn_norm.weight": "consolidated-00028-of-00051.safetensors",
|
447 |
+
"layers.53.attention.wk.weight": "consolidated-00028-of-00051.safetensors",
|
448 |
+
"layers.53.attention.wo.weight": "consolidated-00029-of-00051.safetensors",
|
449 |
+
"layers.53.attention.wq.weight": "consolidated-00029-of-00051.safetensors",
|
450 |
+
"layers.53.attention.wv.weight": "consolidated-00029-of-00051.safetensors",
|
451 |
+
"layers.53.attention_norm.weight": "consolidated-00029-of-00051.safetensors",
|
452 |
+
"layers.53.feed_forward.w1.weight": "consolidated-00029-of-00051.safetensors",
|
453 |
+
"layers.53.feed_forward.w2.weight": "consolidated-00029-of-00051.safetensors",
|
454 |
+
"layers.53.feed_forward.w3.weight": "consolidated-00029-of-00051.safetensors",
|
455 |
+
"layers.53.ffn_norm.weight": "consolidated-00029-of-00051.safetensors",
|
456 |
+
"layers.54.attention.wk.weight": "consolidated-00029-of-00051.safetensors",
|
457 |
+
"layers.54.attention.wo.weight": "consolidated-00029-of-00051.safetensors",
|
458 |
+
"layers.54.attention.wq.weight": "consolidated-00029-of-00051.safetensors",
|
459 |
+
"layers.54.attention.wv.weight": "consolidated-00029-of-00051.safetensors",
|
460 |
+
"layers.54.attention_norm.weight": "consolidated-00029-of-00051.safetensors",
|
461 |
+
"layers.54.feed_forward.w1.weight": "consolidated-00029-of-00051.safetensors",
|
462 |
+
"layers.54.feed_forward.w2.weight": "consolidated-00029-of-00051.safetensors",
|
463 |
+
"layers.54.feed_forward.w3.weight": "consolidated-00030-of-00051.safetensors",
|
464 |
+
"layers.54.ffn_norm.weight": "consolidated-00030-of-00051.safetensors",
|
465 |
+
"layers.55.attention.wk.weight": "consolidated-00030-of-00051.safetensors",
|
466 |
+
"layers.55.attention.wo.weight": "consolidated-00030-of-00051.safetensors",
|
467 |
+
"layers.55.attention.wq.weight": "consolidated-00030-of-00051.safetensors",
|
468 |
+
"layers.55.attention.wv.weight": "consolidated-00030-of-00051.safetensors",
|
469 |
+
"layers.55.attention_norm.weight": "consolidated-00030-of-00051.safetensors",
|
470 |
+
"layers.55.feed_forward.w1.weight": "consolidated-00030-of-00051.safetensors",
|
471 |
+
"layers.55.feed_forward.w2.weight": "consolidated-00030-of-00051.safetensors",
|
472 |
+
"layers.55.feed_forward.w3.weight": "consolidated-00030-of-00051.safetensors",
|
473 |
+
"layers.55.ffn_norm.weight": "consolidated-00030-of-00051.safetensors",
|
474 |
+
"layers.56.attention.wk.weight": "consolidated-00030-of-00051.safetensors",
|
475 |
+
"layers.56.attention.wo.weight": "consolidated-00030-of-00051.safetensors",
|
476 |
+
"layers.56.attention.wq.weight": "consolidated-00030-of-00051.safetensors",
|
477 |
+
"layers.56.attention.wv.weight": "consolidated-00030-of-00051.safetensors",
|
478 |
+
"layers.56.attention_norm.weight": "consolidated-00030-of-00051.safetensors",
|
479 |
+
"layers.56.feed_forward.w1.weight": "consolidated-00030-of-00051.safetensors",
|
480 |
+
"layers.56.feed_forward.w2.weight": "consolidated-00031-of-00051.safetensors",
|
481 |
+
"layers.56.feed_forward.w3.weight": "consolidated-00031-of-00051.safetensors",
|
482 |
+
"layers.56.ffn_norm.weight": "consolidated-00031-of-00051.safetensors",
|
483 |
+
"layers.57.attention.wk.weight": "consolidated-00031-of-00051.safetensors",
|
484 |
+
"layers.57.attention.wo.weight": "consolidated-00031-of-00051.safetensors",
|
485 |
+
"layers.57.attention.wq.weight": "consolidated-00031-of-00051.safetensors",
|
486 |
+
"layers.57.attention.wv.weight": "consolidated-00031-of-00051.safetensors",
|
487 |
+
"layers.57.attention_norm.weight": "consolidated-00031-of-00051.safetensors",
|
488 |
+
"layers.57.feed_forward.w1.weight": "consolidated-00031-of-00051.safetensors",
|
489 |
+
"layers.57.feed_forward.w2.weight": "consolidated-00031-of-00051.safetensors",
|
490 |
+
"layers.57.feed_forward.w3.weight": "consolidated-00031-of-00051.safetensors",
|
491 |
+
"layers.57.ffn_norm.weight": "consolidated-00031-of-00051.safetensors",
|
492 |
+
"layers.58.attention.wk.weight": "consolidated-00031-of-00051.safetensors",
|
493 |
+
"layers.58.attention.wo.weight": "consolidated-00031-of-00051.safetensors",
|
494 |
+
"layers.58.attention.wq.weight": "consolidated-00031-of-00051.safetensors",
|
495 |
+
"layers.58.attention.wv.weight": "consolidated-00031-of-00051.safetensors",
|
496 |
+
"layers.58.attention_norm.weight": "consolidated-00031-of-00051.safetensors",
|
497 |
+
"layers.58.feed_forward.w1.weight": "consolidated-00032-of-00051.safetensors",
|
498 |
+
"layers.58.feed_forward.w2.weight": "consolidated-00032-of-00051.safetensors",
|
499 |
+
"layers.58.feed_forward.w3.weight": "consolidated-00032-of-00051.safetensors",
|
500 |
+
"layers.58.ffn_norm.weight": "consolidated-00032-of-00051.safetensors",
|
501 |
+
"layers.59.attention.wk.weight": "consolidated-00032-of-00051.safetensors",
|
502 |
+
"layers.59.attention.wo.weight": "consolidated-00032-of-00051.safetensors",
|
503 |
+
"layers.59.attention.wq.weight": "consolidated-00032-of-00051.safetensors",
|
504 |
+
"layers.59.attention.wv.weight": "consolidated-00032-of-00051.safetensors",
|
505 |
+
"layers.59.attention_norm.weight": "consolidated-00032-of-00051.safetensors",
|
506 |
+
"layers.59.feed_forward.w1.weight": "consolidated-00032-of-00051.safetensors",
|
507 |
+
"layers.59.feed_forward.w2.weight": "consolidated-00032-of-00051.safetensors",
|
508 |
+
"layers.59.feed_forward.w3.weight": "consolidated-00032-of-00051.safetensors",
|
509 |
+
"layers.59.ffn_norm.weight": "consolidated-00032-of-00051.safetensors",
|
510 |
+
"layers.6.attention.wk.weight": "consolidated-00032-of-00051.safetensors",
|
511 |
+
"layers.6.attention.wo.weight": "consolidated-00033-of-00051.safetensors",
|
512 |
+
"layers.6.attention.wq.weight": "consolidated-00033-of-00051.safetensors",
|
513 |
+
"layers.6.attention.wv.weight": "consolidated-00033-of-00051.safetensors",
|
514 |
+
"layers.6.attention_norm.weight": "consolidated-00033-of-00051.safetensors",
|
515 |
+
"layers.6.feed_forward.w1.weight": "consolidated-00033-of-00051.safetensors",
|
516 |
+
"layers.6.feed_forward.w2.weight": "consolidated-00033-of-00051.safetensors",
|
517 |
+
"layers.6.feed_forward.w3.weight": "consolidated-00033-of-00051.safetensors",
|
518 |
+
"layers.6.ffn_norm.weight": "consolidated-00033-of-00051.safetensors",
|
519 |
+
"layers.60.attention.wk.weight": "consolidated-00033-of-00051.safetensors",
|
520 |
+
"layers.60.attention.wo.weight": "consolidated-00033-of-00051.safetensors",
|
521 |
+
"layers.60.attention.wq.weight": "consolidated-00033-of-00051.safetensors",
|
522 |
+
"layers.60.attention.wv.weight": "consolidated-00033-of-00051.safetensors",
|
523 |
+
"layers.60.attention_norm.weight": "consolidated-00033-of-00051.safetensors",
|
524 |
+
"layers.60.feed_forward.w1.weight": "consolidated-00033-of-00051.safetensors",
|
525 |
+
"layers.60.feed_forward.w2.weight": "consolidated-00033-of-00051.safetensors",
|
526 |
+
"layers.60.feed_forward.w3.weight": "consolidated-00034-of-00051.safetensors",
|
527 |
+
"layers.60.ffn_norm.weight": "consolidated-00034-of-00051.safetensors",
|
528 |
+
"layers.61.attention.wk.weight": "consolidated-00034-of-00051.safetensors",
|
529 |
+
"layers.61.attention.wo.weight": "consolidated-00034-of-00051.safetensors",
|
530 |
+
"layers.61.attention.wq.weight": "consolidated-00034-of-00051.safetensors",
|
531 |
+
"layers.61.attention.wv.weight": "consolidated-00034-of-00051.safetensors",
|
532 |
+
"layers.61.attention_norm.weight": "consolidated-00034-of-00051.safetensors",
|
533 |
+
"layers.61.feed_forward.w1.weight": "consolidated-00034-of-00051.safetensors",
|
534 |
+
"layers.61.feed_forward.w2.weight": "consolidated-00034-of-00051.safetensors",
|
535 |
+
"layers.61.feed_forward.w3.weight": "consolidated-00034-of-00051.safetensors",
|
536 |
+
"layers.61.ffn_norm.weight": "consolidated-00034-of-00051.safetensors",
|
537 |
+
"layers.62.attention.wk.weight": "consolidated-00034-of-00051.safetensors",
|
538 |
+
"layers.62.attention.wo.weight": "consolidated-00034-of-00051.safetensors",
|
539 |
+
"layers.62.attention.wq.weight": "consolidated-00034-of-00051.safetensors",
|
540 |
+
"layers.62.attention.wv.weight": "consolidated-00034-of-00051.safetensors",
|
541 |
+
"layers.62.attention_norm.weight": "consolidated-00034-of-00051.safetensors",
|
542 |
+
"layers.62.feed_forward.w1.weight": "consolidated-00034-of-00051.safetensors",
|
543 |
+
"layers.62.feed_forward.w2.weight": "consolidated-00035-of-00051.safetensors",
|
544 |
+
"layers.62.feed_forward.w3.weight": "consolidated-00035-of-00051.safetensors",
|
545 |
+
"layers.62.ffn_norm.weight": "consolidated-00035-of-00051.safetensors",
|
546 |
+
"layers.63.attention.wk.weight": "consolidated-00035-of-00051.safetensors",
|
547 |
+
"layers.63.attention.wo.weight": "consolidated-00035-of-00051.safetensors",
|
548 |
+
"layers.63.attention.wq.weight": "consolidated-00035-of-00051.safetensors",
|
549 |
+
"layers.63.attention.wv.weight": "consolidated-00035-of-00051.safetensors",
|
550 |
+
"layers.63.attention_norm.weight": "consolidated-00035-of-00051.safetensors",
|
551 |
+
"layers.63.feed_forward.w1.weight": "consolidated-00035-of-00051.safetensors",
|
552 |
+
"layers.63.feed_forward.w2.weight": "consolidated-00035-of-00051.safetensors",
|
553 |
+
"layers.63.feed_forward.w3.weight": "consolidated-00035-of-00051.safetensors",
|
554 |
+
"layers.63.ffn_norm.weight": "consolidated-00035-of-00051.safetensors",
|
555 |
+
"layers.64.attention.wk.weight": "consolidated-00035-of-00051.safetensors",
|
556 |
+
"layers.64.attention.wo.weight": "consolidated-00035-of-00051.safetensors",
|
557 |
+
"layers.64.attention.wq.weight": "consolidated-00035-of-00051.safetensors",
|
558 |
+
"layers.64.attention.wv.weight": "consolidated-00035-of-00051.safetensors",
|
559 |
+
"layers.64.attention_norm.weight": "consolidated-00035-of-00051.safetensors",
|
560 |
+
"layers.64.feed_forward.w1.weight": "consolidated-00036-of-00051.safetensors",
|
561 |
+
"layers.64.feed_forward.w2.weight": "consolidated-00036-of-00051.safetensors",
|
562 |
+
"layers.64.feed_forward.w3.weight": "consolidated-00036-of-00051.safetensors",
|
563 |
+
"layers.64.ffn_norm.weight": "consolidated-00036-of-00051.safetensors",
|
564 |
+
"layers.65.attention.wk.weight": "consolidated-00036-of-00051.safetensors",
|
565 |
+
"layers.65.attention.wo.weight": "consolidated-00036-of-00051.safetensors",
|
566 |
+
"layers.65.attention.wq.weight": "consolidated-00036-of-00051.safetensors",
|
567 |
+
"layers.65.attention.wv.weight": "consolidated-00036-of-00051.safetensors",
|
568 |
+
"layers.65.attention_norm.weight": "consolidated-00036-of-00051.safetensors",
|
569 |
+
"layers.65.feed_forward.w1.weight": "consolidated-00036-of-00051.safetensors",
|
570 |
+
"layers.65.feed_forward.w2.weight": "consolidated-00036-of-00051.safetensors",
|
571 |
+
"layers.65.feed_forward.w3.weight": "consolidated-00036-of-00051.safetensors",
|
572 |
+
"layers.65.ffn_norm.weight": "consolidated-00036-of-00051.safetensors",
|
573 |
+
"layers.66.attention.wk.weight": "consolidated-00036-of-00051.safetensors",
|
574 |
+
"layers.66.attention.wo.weight": "consolidated-00037-of-00051.safetensors",
|
575 |
+
"layers.66.attention.wq.weight": "consolidated-00037-of-00051.safetensors",
|
576 |
+
"layers.66.attention.wv.weight": "consolidated-00037-of-00051.safetensors",
|
577 |
+
"layers.66.attention_norm.weight": "consolidated-00037-of-00051.safetensors",
|
578 |
+
"layers.66.feed_forward.w1.weight": "consolidated-00037-of-00051.safetensors",
|
579 |
+
"layers.66.feed_forward.w2.weight": "consolidated-00037-of-00051.safetensors",
|
580 |
+
"layers.66.feed_forward.w3.weight": "consolidated-00037-of-00051.safetensors",
|
581 |
+
"layers.66.ffn_norm.weight": "consolidated-00037-of-00051.safetensors",
|
582 |
+
"layers.67.attention.wk.weight": "consolidated-00037-of-00051.safetensors",
|
583 |
+
"layers.67.attention.wo.weight": "consolidated-00037-of-00051.safetensors",
|
584 |
+
"layers.67.attention.wq.weight": "consolidated-00037-of-00051.safetensors",
|
585 |
+
"layers.67.attention.wv.weight": "consolidated-00037-of-00051.safetensors",
|
586 |
+
"layers.67.attention_norm.weight": "consolidated-00037-of-00051.safetensors",
|
587 |
+
"layers.67.feed_forward.w1.weight": "consolidated-00037-of-00051.safetensors",
|
588 |
+
"layers.67.feed_forward.w2.weight": "consolidated-00037-of-00051.safetensors",
|
589 |
+
"layers.67.feed_forward.w3.weight": "consolidated-00038-of-00051.safetensors",
|
590 |
+
"layers.67.ffn_norm.weight": "consolidated-00038-of-00051.safetensors",
|
591 |
+
"layers.68.attention.wk.weight": "consolidated-00038-of-00051.safetensors",
|
592 |
+
"layers.68.attention.wo.weight": "consolidated-00038-of-00051.safetensors",
|
593 |
+
"layers.68.attention.wq.weight": "consolidated-00038-of-00051.safetensors",
|
594 |
+
"layers.68.attention.wv.weight": "consolidated-00038-of-00051.safetensors",
|
595 |
+
"layers.68.attention_norm.weight": "consolidated-00038-of-00051.safetensors",
|
596 |
+
"layers.68.feed_forward.w1.weight": "consolidated-00038-of-00051.safetensors",
|
597 |
+
"layers.68.feed_forward.w2.weight": "consolidated-00038-of-00051.safetensors",
|
598 |
+
"layers.68.feed_forward.w3.weight": "consolidated-00038-of-00051.safetensors",
|
599 |
+
"layers.68.ffn_norm.weight": "consolidated-00038-of-00051.safetensors",
|
600 |
+
"layers.69.attention.wk.weight": "consolidated-00038-of-00051.safetensors",
|
601 |
+
"layers.69.attention.wo.weight": "consolidated-00038-of-00051.safetensors",
|
602 |
+
"layers.69.attention.wq.weight": "consolidated-00038-of-00051.safetensors",
|
603 |
+
"layers.69.attention.wv.weight": "consolidated-00038-of-00051.safetensors",
|
604 |
+
"layers.69.attention_norm.weight": "consolidated-00038-of-00051.safetensors",
|
605 |
+
"layers.69.feed_forward.w1.weight": "consolidated-00038-of-00051.safetensors",
|
606 |
+
"layers.69.feed_forward.w2.weight": "consolidated-00039-of-00051.safetensors",
|
607 |
+
"layers.69.feed_forward.w3.weight": "consolidated-00039-of-00051.safetensors",
|
608 |
+
"layers.69.ffn_norm.weight": "consolidated-00039-of-00051.safetensors",
|
609 |
+
"layers.7.attention.wk.weight": "consolidated-00039-of-00051.safetensors",
|
610 |
+
"layers.7.attention.wo.weight": "consolidated-00039-of-00051.safetensors",
|
611 |
+
"layers.7.attention.wq.weight": "consolidated-00039-of-00051.safetensors",
|
612 |
+
"layers.7.attention.wv.weight": "consolidated-00039-of-00051.safetensors",
|
613 |
+
"layers.7.attention_norm.weight": "consolidated-00039-of-00051.safetensors",
|
614 |
+
"layers.7.feed_forward.w1.weight": "consolidated-00039-of-00051.safetensors",
|
615 |
+
"layers.7.feed_forward.w2.weight": "consolidated-00039-of-00051.safetensors",
|
616 |
+
"layers.7.feed_forward.w3.weight": "consolidated-00039-of-00051.safetensors",
|
617 |
+
"layers.7.ffn_norm.weight": "consolidated-00039-of-00051.safetensors",
|
618 |
+
"layers.70.attention.wk.weight": "consolidated-00039-of-00051.safetensors",
|
619 |
+
"layers.70.attention.wo.weight": "consolidated-00039-of-00051.safetensors",
|
620 |
+
"layers.70.attention.wq.weight": "consolidated-00039-of-00051.safetensors",
|
621 |
+
"layers.70.attention.wv.weight": "consolidated-00039-of-00051.safetensors",
|
622 |
+
"layers.70.attention_norm.weight": "consolidated-00039-of-00051.safetensors",
|
623 |
+
"layers.70.feed_forward.w1.weight": "consolidated-00040-of-00051.safetensors",
|
624 |
+
"layers.70.feed_forward.w2.weight": "consolidated-00040-of-00051.safetensors",
|
625 |
+
"layers.70.feed_forward.w3.weight": "consolidated-00040-of-00051.safetensors",
|
626 |
+
"layers.70.ffn_norm.weight": "consolidated-00040-of-00051.safetensors",
|
627 |
+
"layers.71.attention.wk.weight": "consolidated-00040-of-00051.safetensors",
|
628 |
+
"layers.71.attention.wo.weight": "consolidated-00040-of-00051.safetensors",
|
629 |
+
"layers.71.attention.wq.weight": "consolidated-00040-of-00051.safetensors",
|
630 |
+
"layers.71.attention.wv.weight": "consolidated-00040-of-00051.safetensors",
|
631 |
+
"layers.71.attention_norm.weight": "consolidated-00040-of-00051.safetensors",
|
632 |
+
"layers.71.feed_forward.w1.weight": "consolidated-00040-of-00051.safetensors",
|
633 |
+
"layers.71.feed_forward.w2.weight": "consolidated-00040-of-00051.safetensors",
|
634 |
+
"layers.71.feed_forward.w3.weight": "consolidated-00040-of-00051.safetensors",
|
635 |
+
"layers.71.ffn_norm.weight": "consolidated-00040-of-00051.safetensors",
|
636 |
+
"layers.72.attention.wk.weight": "consolidated-00040-of-00051.safetensors",
|
637 |
+
"layers.72.attention.wo.weight": "consolidated-00041-of-00051.safetensors",
|
638 |
+
"layers.72.attention.wq.weight": "consolidated-00041-of-00051.safetensors",
|
639 |
+
"layers.72.attention.wv.weight": "consolidated-00041-of-00051.safetensors",
|
640 |
+
"layers.72.attention_norm.weight": "consolidated-00041-of-00051.safetensors",
|
641 |
+
"layers.72.feed_forward.w1.weight": "consolidated-00041-of-00051.safetensors",
|
642 |
+
"layers.72.feed_forward.w2.weight": "consolidated-00041-of-00051.safetensors",
|
643 |
+
"layers.72.feed_forward.w3.weight": "consolidated-00041-of-00051.safetensors",
|
644 |
+
"layers.72.ffn_norm.weight": "consolidated-00041-of-00051.safetensors",
|
645 |
+
"layers.73.attention.wk.weight": "consolidated-00041-of-00051.safetensors",
|
646 |
+
"layers.73.attention.wo.weight": "consolidated-00041-of-00051.safetensors",
|
647 |
+
"layers.73.attention.wq.weight": "consolidated-00041-of-00051.safetensors",
|
648 |
+
"layers.73.attention.wv.weight": "consolidated-00041-of-00051.safetensors",
|
649 |
+
"layers.73.attention_norm.weight": "consolidated-00041-of-00051.safetensors",
|
650 |
+
"layers.73.feed_forward.w1.weight": "consolidated-00041-of-00051.safetensors",
|
651 |
+
"layers.73.feed_forward.w2.weight": "consolidated-00041-of-00051.safetensors",
|
652 |
+
"layers.73.feed_forward.w3.weight": "consolidated-00042-of-00051.safetensors",
|
653 |
+
"layers.73.ffn_norm.weight": "consolidated-00042-of-00051.safetensors",
|
654 |
+
"layers.74.attention.wk.weight": "consolidated-00042-of-00051.safetensors",
|
655 |
+
"layers.74.attention.wo.weight": "consolidated-00042-of-00051.safetensors",
|
656 |
+
"layers.74.attention.wq.weight": "consolidated-00042-of-00051.safetensors",
|
657 |
+
"layers.74.attention.wv.weight": "consolidated-00042-of-00051.safetensors",
|
658 |
+
"layers.74.attention_norm.weight": "consolidated-00042-of-00051.safetensors",
|
659 |
+
"layers.74.feed_forward.w1.weight": "consolidated-00042-of-00051.safetensors",
|
660 |
+
"layers.74.feed_forward.w2.weight": "consolidated-00042-of-00051.safetensors",
|
661 |
+
"layers.74.feed_forward.w3.weight": "consolidated-00042-of-00051.safetensors",
|
662 |
+
"layers.74.ffn_norm.weight": "consolidated-00042-of-00051.safetensors",
|
663 |
+
"layers.75.attention.wk.weight": "consolidated-00042-of-00051.safetensors",
|
664 |
+
"layers.75.attention.wo.weight": "consolidated-00042-of-00051.safetensors",
|
665 |
+
"layers.75.attention.wq.weight": "consolidated-00042-of-00051.safetensors",
|
666 |
+
"layers.75.attention.wv.weight": "consolidated-00042-of-00051.safetensors",
|
667 |
+
"layers.75.attention_norm.weight": "consolidated-00042-of-00051.safetensors",
|
668 |
+
"layers.75.feed_forward.w1.weight": "consolidated-00042-of-00051.safetensors",
|
669 |
+
"layers.75.feed_forward.w2.weight": "consolidated-00043-of-00051.safetensors",
|
670 |
+
"layers.75.feed_forward.w3.weight": "consolidated-00043-of-00051.safetensors",
|
671 |
+
"layers.75.ffn_norm.weight": "consolidated-00043-of-00051.safetensors",
|
672 |
+
"layers.76.attention.wk.weight": "consolidated-00043-of-00051.safetensors",
|
673 |
+
"layers.76.attention.wo.weight": "consolidated-00043-of-00051.safetensors",
|
674 |
+
"layers.76.attention.wq.weight": "consolidated-00043-of-00051.safetensors",
|
675 |
+
"layers.76.attention.wv.weight": "consolidated-00043-of-00051.safetensors",
|
676 |
+
"layers.76.attention_norm.weight": "consolidated-00043-of-00051.safetensors",
|
677 |
+
"layers.76.feed_forward.w1.weight": "consolidated-00043-of-00051.safetensors",
|
678 |
+
"layers.76.feed_forward.w2.weight": "consolidated-00043-of-00051.safetensors",
|
679 |
+
"layers.76.feed_forward.w3.weight": "consolidated-00043-of-00051.safetensors",
|
680 |
+
"layers.76.ffn_norm.weight": "consolidated-00043-of-00051.safetensors",
|
681 |
+
"layers.77.attention.wk.weight": "consolidated-00043-of-00051.safetensors",
|
682 |
+
"layers.77.attention.wo.weight": "consolidated-00043-of-00051.safetensors",
|
683 |
+
"layers.77.attention.wq.weight": "consolidated-00043-of-00051.safetensors",
|
684 |
+
"layers.77.attention.wv.weight": "consolidated-00043-of-00051.safetensors",
|
685 |
+
"layers.77.attention_norm.weight": "consolidated-00043-of-00051.safetensors",
|
686 |
+
"layers.77.feed_forward.w1.weight": "consolidated-00044-of-00051.safetensors",
|
687 |
+
"layers.77.feed_forward.w2.weight": "consolidated-00044-of-00051.safetensors",
|
688 |
+
"layers.77.feed_forward.w3.weight": "consolidated-00044-of-00051.safetensors",
|
689 |
+
"layers.77.ffn_norm.weight": "consolidated-00044-of-00051.safetensors",
|
690 |
+
"layers.78.attention.wk.weight": "consolidated-00044-of-00051.safetensors",
|
691 |
+
"layers.78.attention.wo.weight": "consolidated-00044-of-00051.safetensors",
|
692 |
+
"layers.78.attention.wq.weight": "consolidated-00044-of-00051.safetensors",
|
693 |
+
"layers.78.attention.wv.weight": "consolidated-00044-of-00051.safetensors",
|
694 |
+
"layers.78.attention_norm.weight": "consolidated-00044-of-00051.safetensors",
|
695 |
+
"layers.78.feed_forward.w1.weight": "consolidated-00044-of-00051.safetensors",
|
696 |
+
"layers.78.feed_forward.w2.weight": "consolidated-00044-of-00051.safetensors",
|
697 |
+
"layers.78.feed_forward.w3.weight": "consolidated-00044-of-00051.safetensors",
|
698 |
+
"layers.78.ffn_norm.weight": "consolidated-00044-of-00051.safetensors",
|
699 |
+
"layers.79.attention.wk.weight": "consolidated-00044-of-00051.safetensors",
|
700 |
+
"layers.79.attention.wo.weight": "consolidated-00045-of-00051.safetensors",
|
701 |
+
"layers.79.attention.wq.weight": "consolidated-00045-of-00051.safetensors",
|
702 |
+
"layers.79.attention.wv.weight": "consolidated-00045-of-00051.safetensors",
|
703 |
+
"layers.79.attention_norm.weight": "consolidated-00045-of-00051.safetensors",
|
704 |
+
"layers.79.feed_forward.w1.weight": "consolidated-00045-of-00051.safetensors",
|
705 |
+
"layers.79.feed_forward.w2.weight": "consolidated-00045-of-00051.safetensors",
|
706 |
+
"layers.79.feed_forward.w3.weight": "consolidated-00045-of-00051.safetensors",
|
707 |
+
"layers.79.ffn_norm.weight": "consolidated-00045-of-00051.safetensors",
|
708 |
+
"layers.8.attention.wk.weight": "consolidated-00045-of-00051.safetensors",
|
709 |
+
"layers.8.attention.wo.weight": "consolidated-00045-of-00051.safetensors",
|
710 |
+
"layers.8.attention.wq.weight": "consolidated-00045-of-00051.safetensors",
|
711 |
+
"layers.8.attention.wv.weight": "consolidated-00045-of-00051.safetensors",
|
712 |
+
"layers.8.attention_norm.weight": "consolidated-00045-of-00051.safetensors",
|
713 |
+
"layers.8.feed_forward.w1.weight": "consolidated-00045-of-00051.safetensors",
|
714 |
+
"layers.8.feed_forward.w2.weight": "consolidated-00045-of-00051.safetensors",
|
715 |
+
"layers.8.feed_forward.w3.weight": "consolidated-00046-of-00051.safetensors",
|
716 |
+
"layers.8.ffn_norm.weight": "consolidated-00046-of-00051.safetensors",
|
717 |
+
"layers.80.attention.wk.weight": "consolidated-00046-of-00051.safetensors",
|
718 |
+
"layers.80.attention.wo.weight": "consolidated-00046-of-00051.safetensors",
|
719 |
+
"layers.80.attention.wq.weight": "consolidated-00046-of-00051.safetensors",
|
720 |
+
"layers.80.attention.wv.weight": "consolidated-00046-of-00051.safetensors",
|
721 |
+
"layers.80.attention_norm.weight": "consolidated-00046-of-00051.safetensors",
|
722 |
+
"layers.80.feed_forward.w1.weight": "consolidated-00046-of-00051.safetensors",
|
723 |
+
"layers.80.feed_forward.w2.weight": "consolidated-00046-of-00051.safetensors",
|
724 |
+
"layers.80.feed_forward.w3.weight": "consolidated-00046-of-00051.safetensors",
|
725 |
+
"layers.80.ffn_norm.weight": "consolidated-00046-of-00051.safetensors",
|
726 |
+
"layers.81.attention.wk.weight": "consolidated-00046-of-00051.safetensors",
|
727 |
+
"layers.81.attention.wo.weight": "consolidated-00046-of-00051.safetensors",
|
728 |
+
"layers.81.attention.wq.weight": "consolidated-00046-of-00051.safetensors",
|
729 |
+
"layers.81.attention.wv.weight": "consolidated-00046-of-00051.safetensors",
|
730 |
+
"layers.81.attention_norm.weight": "consolidated-00046-of-00051.safetensors",
|
731 |
+
"layers.81.feed_forward.w1.weight": "consolidated-00046-of-00051.safetensors",
|
732 |
+
"layers.81.feed_forward.w2.weight": "consolidated-00047-of-00051.safetensors",
|
733 |
+
"layers.81.feed_forward.w3.weight": "consolidated-00047-of-00051.safetensors",
|
734 |
+
"layers.81.ffn_norm.weight": "consolidated-00047-of-00051.safetensors",
|
735 |
+
"layers.82.attention.wk.weight": "consolidated-00047-of-00051.safetensors",
|
736 |
+
"layers.82.attention.wo.weight": "consolidated-00047-of-00051.safetensors",
|
737 |
+
"layers.82.attention.wq.weight": "consolidated-00047-of-00051.safetensors",
|
738 |
+
"layers.82.attention.wv.weight": "consolidated-00047-of-00051.safetensors",
|
739 |
+
"layers.82.attention_norm.weight": "consolidated-00047-of-00051.safetensors",
|
740 |
+
"layers.82.feed_forward.w1.weight": "consolidated-00047-of-00051.safetensors",
|
741 |
+
"layers.82.feed_forward.w2.weight": "consolidated-00047-of-00051.safetensors",
|
742 |
+
"layers.82.feed_forward.w3.weight": "consolidated-00047-of-00051.safetensors",
|
743 |
+
"layers.82.ffn_norm.weight": "consolidated-00047-of-00051.safetensors",
|
744 |
+
"layers.83.attention.wk.weight": "consolidated-00047-of-00051.safetensors",
|
745 |
+
"layers.83.attention.wo.weight": "consolidated-00047-of-00051.safetensors",
|
746 |
+
"layers.83.attention.wq.weight": "consolidated-00047-of-00051.safetensors",
|
747 |
+
"layers.83.attention.wv.weight": "consolidated-00047-of-00051.safetensors",
|
748 |
+
"layers.83.attention_norm.weight": "consolidated-00047-of-00051.safetensors",
|
749 |
+
"layers.83.feed_forward.w1.weight": "consolidated-00048-of-00051.safetensors",
|
750 |
+
"layers.83.feed_forward.w2.weight": "consolidated-00048-of-00051.safetensors",
|
751 |
+
"layers.83.feed_forward.w3.weight": "consolidated-00048-of-00051.safetensors",
|
752 |
+
"layers.83.ffn_norm.weight": "consolidated-00048-of-00051.safetensors",
|
753 |
+
"layers.84.attention.wk.weight": "consolidated-00048-of-00051.safetensors",
|
754 |
+
"layers.84.attention.wo.weight": "consolidated-00048-of-00051.safetensors",
|
755 |
+
"layers.84.attention.wq.weight": "consolidated-00048-of-00051.safetensors",
|
756 |
+
"layers.84.attention.wv.weight": "consolidated-00048-of-00051.safetensors",
|
757 |
+
"layers.84.attention_norm.weight": "consolidated-00048-of-00051.safetensors",
|
758 |
+
"layers.84.feed_forward.w1.weight": "consolidated-00048-of-00051.safetensors",
|
759 |
+
"layers.84.feed_forward.w2.weight": "consolidated-00048-of-00051.safetensors",
|
760 |
+
"layers.84.feed_forward.w3.weight": "consolidated-00048-of-00051.safetensors",
|
761 |
+
"layers.84.ffn_norm.weight": "consolidated-00048-of-00051.safetensors",
|
762 |
+
"layers.85.attention.wk.weight": "consolidated-00048-of-00051.safetensors",
|
763 |
+
"layers.85.attention.wo.weight": "consolidated-00049-of-00051.safetensors",
|
764 |
+
"layers.85.attention.wq.weight": "consolidated-00049-of-00051.safetensors",
|
765 |
+
"layers.85.attention.wv.weight": "consolidated-00049-of-00051.safetensors",
|
766 |
+
"layers.85.attention_norm.weight": "consolidated-00049-of-00051.safetensors",
|
767 |
+
"layers.85.feed_forward.w1.weight": "consolidated-00049-of-00051.safetensors",
|
768 |
+
"layers.85.feed_forward.w2.weight": "consolidated-00049-of-00051.safetensors",
|
769 |
+
"layers.85.feed_forward.w3.weight": "consolidated-00049-of-00051.safetensors",
|
770 |
+
"layers.85.ffn_norm.weight": "consolidated-00049-of-00051.safetensors",
|
771 |
+
"layers.86.attention.wk.weight": "consolidated-00049-of-00051.safetensors",
|
772 |
+
"layers.86.attention.wo.weight": "consolidated-00049-of-00051.safetensors",
|
773 |
+
"layers.86.attention.wq.weight": "consolidated-00049-of-00051.safetensors",
|
774 |
+
"layers.86.attention.wv.weight": "consolidated-00049-of-00051.safetensors",
|
775 |
+
"layers.86.attention_norm.weight": "consolidated-00049-of-00051.safetensors",
|
776 |
+
"layers.86.feed_forward.w1.weight": "consolidated-00049-of-00051.safetensors",
|
777 |
+
"layers.86.feed_forward.w2.weight": "consolidated-00049-of-00051.safetensors",
|
778 |
+
"layers.86.feed_forward.w3.weight": "consolidated-00050-of-00051.safetensors",
|
779 |
+
"layers.86.ffn_norm.weight": "consolidated-00050-of-00051.safetensors",
|
780 |
+
"layers.87.attention.wk.weight": "consolidated-00050-of-00051.safetensors",
|
781 |
+
"layers.87.attention.wo.weight": "consolidated-00050-of-00051.safetensors",
|
782 |
+
"layers.87.attention.wq.weight": "consolidated-00050-of-00051.safetensors",
|
783 |
+
"layers.87.attention.wv.weight": "consolidated-00050-of-00051.safetensors",
|
784 |
+
"layers.87.attention_norm.weight": "consolidated-00050-of-00051.safetensors",
|
785 |
+
"layers.87.feed_forward.w1.weight": "consolidated-00050-of-00051.safetensors",
|
786 |
+
"layers.87.feed_forward.w2.weight": "consolidated-00050-of-00051.safetensors",
|
787 |
+
"layers.87.feed_forward.w3.weight": "consolidated-00050-of-00051.safetensors",
|
788 |
+
"layers.87.ffn_norm.weight": "consolidated-00050-of-00051.safetensors",
|
789 |
+
"layers.9.attention.wk.weight": "consolidated-00050-of-00051.safetensors",
|
790 |
+
"layers.9.attention.wo.weight": "consolidated-00050-of-00051.safetensors",
|
791 |
+
"layers.9.attention.wq.weight": "consolidated-00050-of-00051.safetensors",
|
792 |
+
"layers.9.attention.wv.weight": "consolidated-00050-of-00051.safetensors",
|
793 |
+
"layers.9.attention_norm.weight": "consolidated-00050-of-00051.safetensors",
|
794 |
+
"layers.9.feed_forward.w1.weight": "consolidated-00050-of-00051.safetensors",
|
795 |
+
"layers.9.feed_forward.w2.weight": "consolidated-00051-of-00051.safetensors",
|
796 |
+
"layers.9.feed_forward.w3.weight": "consolidated-00051-of-00051.safetensors",
|
797 |
+
"layers.9.ffn_norm.weight": "consolidated-00051-of-00051.safetensors",
|
798 |
+
"norm.weight": "consolidated-00051-of-00051.safetensors",
|
799 |
+
"output.weight": "consolidated-00051-of-00051.safetensors",
|
800 |
+
"tok_embeddings.weight": "consolidated-00051-of-00051.safetensors"
|
801 |
+
}
|
802 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"transformers_version": "4.46.2"
|
6 |
+
}
|
huggingface-metadata.txt
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
url: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411
|
2 |
+
branch: main
|
3 |
+
download date: 2024-11-18 18:39:37
|
4 |
+
sha256sum:
|
5 |
+
d4a13c7f24603b7bfa0dd2177d19fb15dc17586ad52a8a9cf22b1e6b977636b2 consolidated-00001-of-00051.safetensors
|
6 |
+
dece613ebbcce77e13a4b6b335ff0d77a2f279e1ed0502548ffce8b21397645b consolidated-00002-of-00051.safetensors
|
7 |
+
82609f9909cad709d1232d6c8f1a30585491c89d3f6475499242fdb9d3c964ec consolidated-00003-of-00051.safetensors
|
8 |
+
aaf4d48f04a9c7341ad6236b6267e6325416290422ab9eac84b140f8f27e89c0 consolidated-00004-of-00051.safetensors
|
9 |
+
a7c518166e4a311217a297990e6be200dc884979f8355f03c2925c7247fd3049 consolidated-00005-of-00051.safetensors
|
10 |
+
45b06d9bec5b9c532c84628850fc93a8b1f7d8750858b20b498c8e9293970937 consolidated-00006-of-00051.safetensors
|
11 |
+
d0b663399d881e4768d021c0639489e7b35430d974ec25d004751b832829a356 consolidated-00007-of-00051.safetensors
|
12 |
+
ab3ab184d208ddd442fbe9e509f9edbe9c773a544dd0e622fade3df44845272d consolidated-00008-of-00051.safetensors
|
13 |
+
0608c20a4cae45d763cecc727fec12f933413b34d7bfee690ea24588d3255933 consolidated-00009-of-00051.safetensors
|
14 |
+
6bd8ebdd341e6c93f07ed3fa1c8c96a6b538679050a642b513abcdd7c5aabc13 consolidated-00010-of-00051.safetensors
|
15 |
+
902e8e3ce13ede35ea7b8d23fde69bad78ac94d619eecebcd2519f5acc530a6f consolidated-00011-of-00051.safetensors
|
16 |
+
140c5e4b9dde1a17adc031ddfd8dc8b3b2af99347df8f5cab5fd3b0d295af8ce consolidated-00012-of-00051.safetensors
|
17 |
+
10292ab408be7f56164a7d3d61cd364f88abeb379dea143fcd64322c62e08fdc consolidated-00013-of-00051.safetensors
|
18 |
+
90afec52a08440944a884e88fa52701a07cef59c18b1fe51dcda410dfc067c39 consolidated-00014-of-00051.safetensors
|
19 |
+
40870abb67bc628261fc3ad3bbddede1f46b72d85634f48c7f91ac7519a5519f consolidated-00015-of-00051.safetensors
|
20 |
+
d83573798f812c865d1f7e38f482691af5fe1ea19e32f9483ef9adeb02b64b4c consolidated-00016-of-00051.safetensors
|
21 |
+
7524e0036e5427865f71c7b06d5263bc195b2d6c9e5bf4e3c6c1cbba5bce2771 consolidated-00017-of-00051.safetensors
|
22 |
+
c0cd95aff9ba20324e7e6d839f049f1eaebebcabf4d76ddeecbde5900cfe0c03 consolidated-00018-of-00051.safetensors
|
23 |
+
894e001edf7194d307f0afb435db0d1e879e4ebfb6dc6b6316c292f658f11984 consolidated-00019-of-00051.safetensors
|
24 |
+
52b80d70e6486a71fc77c592749b1e3627c22dd1b0ed68a4880fc2ed9b2352b7 consolidated-00020-of-00051.safetensors
|
25 |
+
585788222e394f05c2277c3957f42bc7da96b784ab7d7a22fd0d1259002034b0 consolidated-00021-of-00051.safetensors
|
26 |
+
04ba1e25b89f89f9ef12712797040f828cdd2fdb032b6987e297b33f16ac8b41 consolidated-00022-of-00051.safetensors
|
27 |
+
9ef8b2d84a76c25027776ed2a02358d4123773effcb12147c201e1c480e0f08f consolidated-00023-of-00051.safetensors
|
28 |
+
32f462852c1fc9967e4e172d8e43a372a1a9164a52c919c4976f0c14b9d3cf5c consolidated-00024-of-00051.safetensors
|
29 |
+
d8670c1d13f8f52dc9142fb95e882e04bf8d4c3352ff1e05d873d9054ebfec72 consolidated-00025-of-00051.safetensors
|
30 |
+
91d9d4b0c4f2ae9759ee4df64bdc0c7589c129130e8b4c777091cbbef3ee6f48 consolidated-00026-of-00051.safetensors
|
31 |
+
433d6171ee812c3b47e330c0a5619c0fb8a8c4985367a8f1754ad71f15c185a8 consolidated-00027-of-00051.safetensors
|
32 |
+
b57b8ed97e6c28074c2eb704ecc133156498b61164827d22b5b0ef89d32e1eff consolidated-00028-of-00051.safetensors
|
33 |
+
076cfd24f8927d1654348e92a56f2dd0ec671242cdec6f33cc354dec06ad71e6 consolidated-00029-of-00051.safetensors
|
34 |
+
93db53e140fb63861eb61fb646e935ad1dd356ba1906dac01925ac861978a7aa consolidated-00030-of-00051.safetensors
|
35 |
+
7395ea79d793047c0fa4dc756034896b49f27401010adb4eb1e66307d502cd7e consolidated-00031-of-00051.safetensors
|
36 |
+
8388940ca5c8565bfbd6460208015ab71dab37d9abca2f7c92ca2d98a616fc54 consolidated-00032-of-00051.safetensors
|
37 |
+
6a5ae12efb8d6f15c324d8272395827e7ca572e1a5d60555b70640190f4d6aaa consolidated-00033-of-00051.safetensors
|
38 |
+
a3c8a851d43b9da4c376b25ad6bfa3724da4a0f930ffeabe7e2db3d4013b9a25 consolidated-00034-of-00051.safetensors
|
39 |
+
3c05ac633d18047a6c95a20f271f8acf16faa8a689ac9c347f20ca334feb7224 consolidated-00035-of-00051.safetensors
|
40 |
+
3c5baec25ecdc5853a8194309ec155d361be9fd78410ba7a0ed30d7027d1dc19 consolidated-00036-of-00051.safetensors
|
41 |
+
0ad0b601d08f70451c24cf39bdbb38a9b1d433cff7be805a56eabd12be0887f4 consolidated-00037-of-00051.safetensors
|
42 |
+
58ecd2d47c53c6132f25aef6744798fe48eca456b34b29519d970caf92f14415 consolidated-00038-of-00051.safetensors
|
43 |
+
3dc879623488fb451b41139f428f698a409766e7aa25a8e0e301f28a55907d4e consolidated-00039-of-00051.safetensors
|
44 |
+
b34a2febab3529b2093b002ed05e87334d58b2a78176532f6e70121d0915dd75 consolidated-00040-of-00051.safetensors
|
45 |
+
c6cb502eaf2d70333f937fa294be426a332aa144697fb0c73a1ea4050bcdac51 consolidated-00041-of-00051.safetensors
|
46 |
+
e4110d79e5ce5735c778f1113901f7716fb05661f7cd29e85979283f8f1c10b6 consolidated-00042-of-00051.safetensors
|
47 |
+
6ac53ebead2174ec2d36e3e9354bc2ae4da5ccccad938ff0d494196c92ecf58d consolidated-00043-of-00051.safetensors
|
48 |
+
36d97f17c0a08547c3387b973c8ff47b2f956e9b8b063f3bd9520effd3a97607 consolidated-00044-of-00051.safetensors
|
49 |
+
546a3e88194fe986025a6bb747d0fd9daa980e4540050744b005f8fb452b5c30 consolidated-00045-of-00051.safetensors
|
50 |
+
16029249bb10a6b8d8bbd5a5b1a69d9330241e1a1b7d70b9730ef6a46f680426 consolidated-00046-of-00051.safetensors
|
51 |
+
3346fa628f5e217e063928c09f6ff14e90f0bf706b77e37bd96cc235e030fc92 consolidated-00047-of-00051.safetensors
|
52 |
+
1b05de28fd505d6f09c6ce9407faff25989d64795aab6fbad0a6547890000797 consolidated-00048-of-00051.safetensors
|
53 |
+
a7a22d74a6bab606986c138322f17fbd8c772212953a01fbb5d4e6e9a6aa9513 consolidated-00049-of-00051.safetensors
|
54 |
+
c64b1e6151423f2ac1c397eac823b2474b4b7345516466f6f8aadf603cb0e48b consolidated-00050-of-00051.safetensors
|
55 |
+
fcfe5a56ff3d9647743f38fff861ca1eee1f991631032d780fb2e1ebccc5a5e3 consolidated-00051-of-00051.safetensors
|
56 |
+
03037b373b02b533f9df9b0bccd36f5d90e377fdd513b96c5973d3096dd8d387 model-00001-of-00051.safetensors
|
57 |
+
9d707f1d9c6acb527d3acbadf242785a2e060c4b71ecb57a36851e1d725755d4 model-00002-of-00051.safetensors
|
58 |
+
42b237bac67998febdf6ad935ffb6c48354ef6619723b81bdfbc291c318c3c25 model-00003-of-00051.safetensors
|
59 |
+
9f2da3c583779de5062114dc0540c3b980e2a2b40ba8885c76362abd39ce8b33 model-00004-of-00051.safetensors
|
60 |
+
d333cdd3c1eb1b26be345c1c5a216225130ed3cf5a6705fdf033234ec9ba7289 model-00005-of-00051.safetensors
|
61 |
+
c2dbb446a68bbe88261f65bac5d3f48883ba6bf91aa944dee934092c970f2f8f model-00006-of-00051.safetensors
|
62 |
+
326603e9b90998674924647a57433afbab99fca96a68e0a198cb00211d229246 model-00007-of-00051.safetensors
|
63 |
+
e9a0d69218e0e6f5a344aba1370e72fa74bf6a9af7f8f2bff57240d9a5b6f20c model-00008-of-00051.safetensors
|
64 |
+
60faaec055399bc222e31b906ba8106c755900545da2ef8406a6027901037c71 model-00009-of-00051.safetensors
|
65 |
+
07b16e734d0c8ab9fb4e34b5c4ea2b2c661f65d399c6701f386c3c9a6c6c8654 model-00010-of-00051.safetensors
|
66 |
+
e14440ccfabcf6f6c26c7d59e4a7c3c7ac612dca6eb2d41a403c01fd4cdf282f model-00011-of-00051.safetensors
|
67 |
+
a027915856a530dd45467e59b370cfd01927cea2107a60f8146e6faaa0d076bc model-00012-of-00051.safetensors
|
68 |
+
3bb7951d0d8885e1921b5b1b6d4f2bac1950a918cdb92fba93863770fa076a76 model-00013-of-00051.safetensors
|
69 |
+
d5d02352ab0a61e20754abdf5ef08a17531ef959e62003656ed97166b23407db model-00014-of-00051.safetensors
|
70 |
+
7de8d19f23007f7b4a41482041672c145d858e68f484f5fc70cd01e73d14ce32 model-00015-of-00051.safetensors
|
71 |
+
21236191524fe50c5531b3343484cacf7da4764619984a8a06b4cfd7ea3105c3 model-00016-of-00051.safetensors
|
72 |
+
fa7a70677bc9dac496097cbdb1b3e0e5539e38cc039a70eaa696514868669968 model-00017-of-00051.safetensors
|
73 |
+
ac0ac15e621af7d25b93d3e738e09a00f12295b4ca0de4b51463eb84aa22c2c2 model-00018-of-00051.safetensors
|
74 |
+
b8e1fc69bb1fbbaa5ee5275b5c06fa8430c175d615713a8cf3954da723e249cd model-00019-of-00051.safetensors
|
75 |
+
5afcf48b80f8d12dda9865cf56a1a2b896d38b666dc3947230ff1f6c75975ea1 model-00020-of-00051.safetensors
|
76 |
+
65bb171ed81c96ba4abf7b0cdeb2acb453ec3b41191ff8348b7c15860aad98a6 model-00021-of-00051.safetensors
|
77 |
+
5a3df5d20923867e2f6151e246f9727c69624ad27cd5ce55caa1372cf4a2fe35 model-00022-of-00051.safetensors
|
78 |
+
4c15fec1fd5007479344bd40278bdbfec2d8e70fbfbc23e914b48791e3066e17 model-00023-of-00051.safetensors
|
79 |
+
611b70f864580c0541d84727dd9367a7633e5394ed7b0362f3959eed9173391d model-00024-of-00051.safetensors
|
80 |
+
e95195c4768d55bc6e51a26240c216ecc96b6c8fc604172bd51744476cf55b22 model-00025-of-00051.safetensors
|
81 |
+
3ce68cbeee64f88cf8d233ed9a619a74292df0a347eb97c0654cd78b600f2a79 model-00026-of-00051.safetensors
|
82 |
+
e4c06c7c5a0e33054e9b2340894666d547efbbb3ba45d4516e6f4bec2234a068 model-00027-of-00051.safetensors
|
83 |
+
b0f78c9389c7d44616efa7b719659188ca0c36f3fd33d6b1f456a6b6227e3d86 model-00028-of-00051.safetensors
|
84 |
+
eef849759cd3b0d5386dce808b473e95bc0d5e87c744463844b311a4049045f7 model-00029-of-00051.safetensors
|
85 |
+
8d08971298f860a865999b3c775c12bbc46c3ddd930269e9b0ce7aea8717d2a7 model-00030-of-00051.safetensors
|
86 |
+
4dcae2f1b579a80faa4c88166dfff6cba4f80ac6edfebda6d73dc5a1aa6ccb81 model-00031-of-00051.safetensors
|
87 |
+
d3c878477a3823af44464f0962ea115d34fcc78442f69e045e59d3f7d8f5764c model-00032-of-00051.safetensors
|
88 |
+
2e40452b3fa854c6e6bd31c037ff5ef4e0d784fd2e0f672cfe1ab27f6d69b11c model-00033-of-00051.safetensors
|
89 |
+
2f582456d903d4d8a9f6979e21461cc3ae925b8a4483ab587d4567b9fedadd11 model-00034-of-00051.safetensors
|
90 |
+
7026c7ade3ee95cb8a9080efe91757b8e403d6cb3327780bfc4a0e852fdd4cd7 model-00035-of-00051.safetensors
|
91 |
+
de57a0990992e1f5d20a0021d02ef1ee82dccb07b2c5ecce3c03e1b128499efb model-00036-of-00051.safetensors
|
92 |
+
1b4e637d1a2b93bcbdef18ad5d3ba3e174be6f4dd21ba30bcfd8924cbc55ffc8 model-00037-of-00051.safetensors
|
93 |
+
013776dd6397af1a2e0feddc56203eaf82cd6ae0f5daf5e9c56efb0edb2b351e model-00038-of-00051.safetensors
|
94 |
+
3af9ee9a8209cd0d2284485fa07e4db89c9c7b2846a544fe06d382d9d12d733f model-00039-of-00051.safetensors
|
95 |
+
41587c00135a4cb92ed909d84d2ee153858ac449ca2bc1fddf6c0d6b3876f5f2 model-00040-of-00051.safetensors
|
96 |
+
be9d72bac4295280e293fd12314c6fb85bb9b601c32bcbb411e69eb7cf2ac2fd model-00041-of-00051.safetensors
|
97 |
+
5eeb4b982d6565cb9d3be46c4283cc846bb4620f58aedee0b07d450b798db333 model-00042-of-00051.safetensors
|
98 |
+
f208789ce6c1289a09d140a4a030d5c8b75d8bd6e9a2470780c14b14d1f59a54 model-00043-of-00051.safetensors
|
99 |
+
c66a6bf2a7f31948a2f8914198942b468bc195046d9acc0dcfcdd0233ba052d0 model-00044-of-00051.safetensors
|
100 |
+
9b2a13ae735dd6e92a78baf908711fecf36bfb8381f7129e44c6e82cc20ebff6 model-00045-of-00051.safetensors
|
101 |
+
3fdf047279cd31f4787a3b025a2542ef3cac15807084b00f94fff73fcf4b58e2 model-00046-of-00051.safetensors
|
102 |
+
1134b1331972ac186af9c89d06f516d4ea08a229b53372990b00567a3955c87e model-00047-of-00051.safetensors
|
103 |
+
c90053f320297b4ecb8c7342c676051a8f5958a7c353128d61e86d2cde925008 model-00048-of-00051.safetensors
|
104 |
+
924caf107080281568674a31116ec4a8fa0634917340bdab30530ad9eac42d10 model-00049-of-00051.safetensors
|
105 |
+
cbb97b8db4380aeb5a234a65a5426233c86130e3323eef41cca545add499a336 model-00050-of-00051.safetensors
|
106 |
+
560af5bd84f90b0153b41366870bae43d55f4a0e9e5ddacbf3de3032abf3c4e2 model-00051-of-00051.safetensors
|
107 |
+
1b968b8dc352f42192367337c78ccc61e1eaddc6d641a579372d4f20694beb7a tokenizer.model
|
measurement.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors.index.json
ADDED
@@ -0,0 +1,802 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"total_size": 245220139008
|
4 |
+
},
|
5 |
+
"weight_map": {
|
6 |
+
"lm_head.weight": "model-00051-of-00051.safetensors",
|
7 |
+
"model.embed_tokens.weight": "model-00001-of-00051.safetensors",
|
8 |
+
"model.layers.0.input_layernorm.weight": "model-00001-of-00051.safetensors",
|
9 |
+
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00051.safetensors",
|
10 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00051.safetensors",
|
11 |
+
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00051.safetensors",
|
12 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00051.safetensors",
|
13 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00051.safetensors",
|
14 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00051.safetensors",
|
15 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00051.safetensors",
|
16 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00051.safetensors",
|
17 |
+
"model.layers.1.input_layernorm.weight": "model-00002-of-00051.safetensors",
|
18 |
+
"model.layers.1.mlp.down_proj.weight": "model-00002-of-00051.safetensors",
|
19 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00051.safetensors",
|
20 |
+
"model.layers.1.mlp.up_proj.weight": "model-00002-of-00051.safetensors",
|
21 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00002-of-00051.safetensors",
|
22 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00051.safetensors",
|
23 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00051.safetensors",
|
24 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00051.safetensors",
|
25 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00051.safetensors",
|
26 |
+
"model.layers.10.input_layernorm.weight": "model-00007-of-00051.safetensors",
|
27 |
+
"model.layers.10.mlp.down_proj.weight": "model-00007-of-00051.safetensors",
|
28 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00007-of-00051.safetensors",
|
29 |
+
"model.layers.10.mlp.up_proj.weight": "model-00007-of-00051.safetensors",
|
30 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00007-of-00051.safetensors",
|
31 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00006-of-00051.safetensors",
|
32 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00006-of-00051.safetensors",
|
33 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00006-of-00051.safetensors",
|
34 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00006-of-00051.safetensors",
|
35 |
+
"model.layers.11.input_layernorm.weight": "model-00007-of-00051.safetensors",
|
36 |
+
"model.layers.11.mlp.down_proj.weight": "model-00007-of-00051.safetensors",
|
37 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00007-of-00051.safetensors",
|
38 |
+
"model.layers.11.mlp.up_proj.weight": "model-00007-of-00051.safetensors",
|
39 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00007-of-00051.safetensors",
|
40 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00007-of-00051.safetensors",
|
41 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00007-of-00051.safetensors",
|
42 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00007-of-00051.safetensors",
|
43 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00007-of-00051.safetensors",
|
44 |
+
"model.layers.12.input_layernorm.weight": "model-00008-of-00051.safetensors",
|
45 |
+
"model.layers.12.mlp.down_proj.weight": "model-00008-of-00051.safetensors",
|
46 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00008-of-00051.safetensors",
|
47 |
+
"model.layers.12.mlp.up_proj.weight": "model-00008-of-00051.safetensors",
|
48 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00008-of-00051.safetensors",
|
49 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00008-of-00051.safetensors",
|
50 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00008-of-00051.safetensors",
|
51 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00008-of-00051.safetensors",
|
52 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00008-of-00051.safetensors",
|
53 |
+
"model.layers.13.input_layernorm.weight": "model-00009-of-00051.safetensors",
|
54 |
+
"model.layers.13.mlp.down_proj.weight": "model-00009-of-00051.safetensors",
|
55 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00008-of-00051.safetensors",
|
56 |
+
"model.layers.13.mlp.up_proj.weight": "model-00008-of-00051.safetensors",
|
57 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00009-of-00051.safetensors",
|
58 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00008-of-00051.safetensors",
|
59 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00008-of-00051.safetensors",
|
60 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00008-of-00051.safetensors",
|
61 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00008-of-00051.safetensors",
|
62 |
+
"model.layers.14.input_layernorm.weight": "model-00009-of-00051.safetensors",
|
63 |
+
"model.layers.14.mlp.down_proj.weight": "model-00009-of-00051.safetensors",
|
64 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00009-of-00051.safetensors",
|
65 |
+
"model.layers.14.mlp.up_proj.weight": "model-00009-of-00051.safetensors",
|
66 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00009-of-00051.safetensors",
|
67 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00009-of-00051.safetensors",
|
68 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00009-of-00051.safetensors",
|
69 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00009-of-00051.safetensors",
|
70 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00009-of-00051.safetensors",
|
71 |
+
"model.layers.15.input_layernorm.weight": "model-00010-of-00051.safetensors",
|
72 |
+
"model.layers.15.mlp.down_proj.weight": "model-00010-of-00051.safetensors",
|
73 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00009-of-00051.safetensors",
|
74 |
+
"model.layers.15.mlp.up_proj.weight": "model-00010-of-00051.safetensors",
|
75 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00010-of-00051.safetensors",
|
76 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00009-of-00051.safetensors",
|
77 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00009-of-00051.safetensors",
|
78 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00009-of-00051.safetensors",
|
79 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00009-of-00051.safetensors",
|
80 |
+
"model.layers.16.input_layernorm.weight": "model-00010-of-00051.safetensors",
|
81 |
+
"model.layers.16.mlp.down_proj.weight": "model-00010-of-00051.safetensors",
|
82 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00010-of-00051.safetensors",
|
83 |
+
"model.layers.16.mlp.up_proj.weight": "model-00010-of-00051.safetensors",
|
84 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00010-of-00051.safetensors",
|
85 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00010-of-00051.safetensors",
|
86 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00010-of-00051.safetensors",
|
87 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00010-of-00051.safetensors",
|
88 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00010-of-00051.safetensors",
|
89 |
+
"model.layers.17.input_layernorm.weight": "model-00011-of-00051.safetensors",
|
90 |
+
"model.layers.17.mlp.down_proj.weight": "model-00011-of-00051.safetensors",
|
91 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00011-of-00051.safetensors",
|
92 |
+
"model.layers.17.mlp.up_proj.weight": "model-00011-of-00051.safetensors",
|
93 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00011-of-00051.safetensors",
|
94 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00010-of-00051.safetensors",
|
95 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00010-of-00051.safetensors",
|
96 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00010-of-00051.safetensors",
|
97 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00010-of-00051.safetensors",
|
98 |
+
"model.layers.18.input_layernorm.weight": "model-00011-of-00051.safetensors",
|
99 |
+
"model.layers.18.mlp.down_proj.weight": "model-00011-of-00051.safetensors",
|
100 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00011-of-00051.safetensors",
|
101 |
+
"model.layers.18.mlp.up_proj.weight": "model-00011-of-00051.safetensors",
|
102 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00011-of-00051.safetensors",
|
103 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00011-of-00051.safetensors",
|
104 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00011-of-00051.safetensors",
|
105 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00011-of-00051.safetensors",
|
106 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00011-of-00051.safetensors",
|
107 |
+
"model.layers.19.input_layernorm.weight": "model-00012-of-00051.safetensors",
|
108 |
+
"model.layers.19.mlp.down_proj.weight": "model-00012-of-00051.safetensors",
|
109 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00012-of-00051.safetensors",
|
110 |
+
"model.layers.19.mlp.up_proj.weight": "model-00012-of-00051.safetensors",
|
111 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00012-of-00051.safetensors",
|
112 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00012-of-00051.safetensors",
|
113 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00012-of-00051.safetensors",
|
114 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00012-of-00051.safetensors",
|
115 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00012-of-00051.safetensors",
|
116 |
+
"model.layers.2.input_layernorm.weight": "model-00002-of-00051.safetensors",
|
117 |
+
"model.layers.2.mlp.down_proj.weight": "model-00002-of-00051.safetensors",
|
118 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00002-of-00051.safetensors",
|
119 |
+
"model.layers.2.mlp.up_proj.weight": "model-00002-of-00051.safetensors",
|
120 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00002-of-00051.safetensors",
|
121 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00002-of-00051.safetensors",
|
122 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00002-of-00051.safetensors",
|
123 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00002-of-00051.safetensors",
|
124 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00002-of-00051.safetensors",
|
125 |
+
"model.layers.20.input_layernorm.weight": "model-00013-of-00051.safetensors",
|
126 |
+
"model.layers.20.mlp.down_proj.weight": "model-00013-of-00051.safetensors",
|
127 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00012-of-00051.safetensors",
|
128 |
+
"model.layers.20.mlp.up_proj.weight": "model-00012-of-00051.safetensors",
|
129 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00013-of-00051.safetensors",
|
130 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00012-of-00051.safetensors",
|
131 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00012-of-00051.safetensors",
|
132 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00012-of-00051.safetensors",
|
133 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00012-of-00051.safetensors",
|
134 |
+
"model.layers.21.input_layernorm.weight": "model-00013-of-00051.safetensors",
|
135 |
+
"model.layers.21.mlp.down_proj.weight": "model-00013-of-00051.safetensors",
|
136 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00013-of-00051.safetensors",
|
137 |
+
"model.layers.21.mlp.up_proj.weight": "model-00013-of-00051.safetensors",
|
138 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00013-of-00051.safetensors",
|
139 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00013-of-00051.safetensors",
|
140 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00013-of-00051.safetensors",
|
141 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00013-of-00051.safetensors",
|
142 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00013-of-00051.safetensors",
|
143 |
+
"model.layers.22.input_layernorm.weight": "model-00014-of-00051.safetensors",
|
144 |
+
"model.layers.22.mlp.down_proj.weight": "model-00014-of-00051.safetensors",
|
145 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00013-of-00051.safetensors",
|
146 |
+
"model.layers.22.mlp.up_proj.weight": "model-00014-of-00051.safetensors",
|
147 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00014-of-00051.safetensors",
|
148 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00013-of-00051.safetensors",
|
149 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00013-of-00051.safetensors",
|
150 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00013-of-00051.safetensors",
|
151 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00013-of-00051.safetensors",
|
152 |
+
"model.layers.23.input_layernorm.weight": "model-00014-of-00051.safetensors",
|
153 |
+
"model.layers.23.mlp.down_proj.weight": "model-00014-of-00051.safetensors",
|
154 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00014-of-00051.safetensors",
|
155 |
+
"model.layers.23.mlp.up_proj.weight": "model-00014-of-00051.safetensors",
|
156 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00014-of-00051.safetensors",
|
157 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00014-of-00051.safetensors",
|
158 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00014-of-00051.safetensors",
|
159 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00014-of-00051.safetensors",
|
160 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00014-of-00051.safetensors",
|
161 |
+
"model.layers.24.input_layernorm.weight": "model-00015-of-00051.safetensors",
|
162 |
+
"model.layers.24.mlp.down_proj.weight": "model-00015-of-00051.safetensors",
|
163 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00015-of-00051.safetensors",
|
164 |
+
"model.layers.24.mlp.up_proj.weight": "model-00015-of-00051.safetensors",
|
165 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00015-of-00051.safetensors",
|
166 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00014-of-00051.safetensors",
|
167 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00014-of-00051.safetensors",
|
168 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00014-of-00051.safetensors",
|
169 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00014-of-00051.safetensors",
|
170 |
+
"model.layers.25.input_layernorm.weight": "model-00015-of-00051.safetensors",
|
171 |
+
"model.layers.25.mlp.down_proj.weight": "model-00015-of-00051.safetensors",
|
172 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00015-of-00051.safetensors",
|
173 |
+
"model.layers.25.mlp.up_proj.weight": "model-00015-of-00051.safetensors",
|
174 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00015-of-00051.safetensors",
|
175 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00015-of-00051.safetensors",
|
176 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00015-of-00051.safetensors",
|
177 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00015-of-00051.safetensors",
|
178 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00015-of-00051.safetensors",
|
179 |
+
"model.layers.26.input_layernorm.weight": "model-00016-of-00051.safetensors",
|
180 |
+
"model.layers.26.mlp.down_proj.weight": "model-00016-of-00051.safetensors",
|
181 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00016-of-00051.safetensors",
|
182 |
+
"model.layers.26.mlp.up_proj.weight": "model-00016-of-00051.safetensors",
|
183 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00016-of-00051.safetensors",
|
184 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00016-of-00051.safetensors",
|
185 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00016-of-00051.safetensors",
|
186 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00016-of-00051.safetensors",
|
187 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00016-of-00051.safetensors",
|
188 |
+
"model.layers.27.input_layernorm.weight": "model-00017-of-00051.safetensors",
|
189 |
+
"model.layers.27.mlp.down_proj.weight": "model-00017-of-00051.safetensors",
|
190 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00016-of-00051.safetensors",
|
191 |
+
"model.layers.27.mlp.up_proj.weight": "model-00016-of-00051.safetensors",
|
192 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00017-of-00051.safetensors",
|
193 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00016-of-00051.safetensors",
|
194 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00016-of-00051.safetensors",
|
195 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00016-of-00051.safetensors",
|
196 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00016-of-00051.safetensors",
|
197 |
+
"model.layers.28.input_layernorm.weight": "model-00017-of-00051.safetensors",
|
198 |
+
"model.layers.28.mlp.down_proj.weight": "model-00017-of-00051.safetensors",
|
199 |
+
"model.layers.28.mlp.gate_proj.weight": "model-00017-of-00051.safetensors",
|
200 |
+
"model.layers.28.mlp.up_proj.weight": "model-00017-of-00051.safetensors",
|
201 |
+
"model.layers.28.post_attention_layernorm.weight": "model-00017-of-00051.safetensors",
|
202 |
+
"model.layers.28.self_attn.k_proj.weight": "model-00017-of-00051.safetensors",
|
203 |
+
"model.layers.28.self_attn.o_proj.weight": "model-00017-of-00051.safetensors",
|
204 |
+
"model.layers.28.self_attn.q_proj.weight": "model-00017-of-00051.safetensors",
|
205 |
+
"model.layers.28.self_attn.v_proj.weight": "model-00017-of-00051.safetensors",
|
206 |
+
"model.layers.29.input_layernorm.weight": "model-00018-of-00051.safetensors",
|
207 |
+
"model.layers.29.mlp.down_proj.weight": "model-00018-of-00051.safetensors",
|
208 |
+
"model.layers.29.mlp.gate_proj.weight": "model-00017-of-00051.safetensors",
|
209 |
+
"model.layers.29.mlp.up_proj.weight": "model-00018-of-00051.safetensors",
|
210 |
+
"model.layers.29.post_attention_layernorm.weight": "model-00018-of-00051.safetensors",
|
211 |
+
"model.layers.29.self_attn.k_proj.weight": "model-00017-of-00051.safetensors",
|
212 |
+
"model.layers.29.self_attn.o_proj.weight": "model-00017-of-00051.safetensors",
|
213 |
+
"model.layers.29.self_attn.q_proj.weight": "model-00017-of-00051.safetensors",
|
214 |
+
"model.layers.29.self_attn.v_proj.weight": "model-00017-of-00051.safetensors",
|
215 |
+
"model.layers.3.input_layernorm.weight": "model-00003-of-00051.safetensors",
|
216 |
+
"model.layers.3.mlp.down_proj.weight": "model-00003-of-00051.safetensors",
|
217 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00003-of-00051.safetensors",
|
218 |
+
"model.layers.3.mlp.up_proj.weight": "model-00003-of-00051.safetensors",
|
219 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00003-of-00051.safetensors",
|
220 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00002-of-00051.safetensors",
|
221 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00002-of-00051.safetensors",
|
222 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00002-of-00051.safetensors",
|
223 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00002-of-00051.safetensors",
|
224 |
+
"model.layers.30.input_layernorm.weight": "model-00018-of-00051.safetensors",
|
225 |
+
"model.layers.30.mlp.down_proj.weight": "model-00018-of-00051.safetensors",
|
226 |
+
"model.layers.30.mlp.gate_proj.weight": "model-00018-of-00051.safetensors",
|
227 |
+
"model.layers.30.mlp.up_proj.weight": "model-00018-of-00051.safetensors",
|
228 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00018-of-00051.safetensors",
|
229 |
+
"model.layers.30.self_attn.k_proj.weight": "model-00018-of-00051.safetensors",
|
230 |
+
"model.layers.30.self_attn.o_proj.weight": "model-00018-of-00051.safetensors",
|
231 |
+
"model.layers.30.self_attn.q_proj.weight": "model-00018-of-00051.safetensors",
|
232 |
+
"model.layers.30.self_attn.v_proj.weight": "model-00018-of-00051.safetensors",
|
233 |
+
"model.layers.31.input_layernorm.weight": "model-00019-of-00051.safetensors",
|
234 |
+
"model.layers.31.mlp.down_proj.weight": "model-00019-of-00051.safetensors",
|
235 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00019-of-00051.safetensors",
|
236 |
+
"model.layers.31.mlp.up_proj.weight": "model-00019-of-00051.safetensors",
|
237 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00019-of-00051.safetensors",
|
238 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00018-of-00051.safetensors",
|
239 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00018-of-00051.safetensors",
|
240 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00018-of-00051.safetensors",
|
241 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00018-of-00051.safetensors",
|
242 |
+
"model.layers.32.input_layernorm.weight": "model-00019-of-00051.safetensors",
|
243 |
+
"model.layers.32.mlp.down_proj.weight": "model-00019-of-00051.safetensors",
|
244 |
+
"model.layers.32.mlp.gate_proj.weight": "model-00019-of-00051.safetensors",
|
245 |
+
"model.layers.32.mlp.up_proj.weight": "model-00019-of-00051.safetensors",
|
246 |
+
"model.layers.32.post_attention_layernorm.weight": "model-00019-of-00051.safetensors",
|
247 |
+
"model.layers.32.self_attn.k_proj.weight": "model-00019-of-00051.safetensors",
|
248 |
+
"model.layers.32.self_attn.o_proj.weight": "model-00019-of-00051.safetensors",
|
249 |
+
"model.layers.32.self_attn.q_proj.weight": "model-00019-of-00051.safetensors",
|
250 |
+
"model.layers.32.self_attn.v_proj.weight": "model-00019-of-00051.safetensors",
|
251 |
+
"model.layers.33.input_layernorm.weight": "model-00020-of-00051.safetensors",
|
252 |
+
"model.layers.33.mlp.down_proj.weight": "model-00020-of-00051.safetensors",
|
253 |
+
"model.layers.33.mlp.gate_proj.weight": "model-00020-of-00051.safetensors",
|
254 |
+
"model.layers.33.mlp.up_proj.weight": "model-00020-of-00051.safetensors",
|
255 |
+
"model.layers.33.post_attention_layernorm.weight": "model-00020-of-00051.safetensors",
|
256 |
+
"model.layers.33.self_attn.k_proj.weight": "model-00020-of-00051.safetensors",
|
257 |
+
"model.layers.33.self_attn.o_proj.weight": "model-00020-of-00051.safetensors",
|
258 |
+
"model.layers.33.self_attn.q_proj.weight": "model-00020-of-00051.safetensors",
|
259 |
+
"model.layers.33.self_attn.v_proj.weight": "model-00020-of-00051.safetensors",
|
260 |
+
"model.layers.34.input_layernorm.weight": "model-00021-of-00051.safetensors",
|
261 |
+
"model.layers.34.mlp.down_proj.weight": "model-00021-of-00051.safetensors",
|
262 |
+
"model.layers.34.mlp.gate_proj.weight": "model-00020-of-00051.safetensors",
|
263 |
+
"model.layers.34.mlp.up_proj.weight": "model-00020-of-00051.safetensors",
|
264 |
+
"model.layers.34.post_attention_layernorm.weight": "model-00021-of-00051.safetensors",
|
265 |
+
"model.layers.34.self_attn.k_proj.weight": "model-00020-of-00051.safetensors",
|
266 |
+
"model.layers.34.self_attn.o_proj.weight": "model-00020-of-00051.safetensors",
|
267 |
+
"model.layers.34.self_attn.q_proj.weight": "model-00020-of-00051.safetensors",
|
268 |
+
"model.layers.34.self_attn.v_proj.weight": "model-00020-of-00051.safetensors",
|
269 |
+
"model.layers.35.input_layernorm.weight": "model-00021-of-00051.safetensors",
|
270 |
+
"model.layers.35.mlp.down_proj.weight": "model-00021-of-00051.safetensors",
|
271 |
+
"model.layers.35.mlp.gate_proj.weight": "model-00021-of-00051.safetensors",
|
272 |
+
"model.layers.35.mlp.up_proj.weight": "model-00021-of-00051.safetensors",
|
273 |
+
"model.layers.35.post_attention_layernorm.weight": "model-00021-of-00051.safetensors",
|
274 |
+
"model.layers.35.self_attn.k_proj.weight": "model-00021-of-00051.safetensors",
|
275 |
+
"model.layers.35.self_attn.o_proj.weight": "model-00021-of-00051.safetensors",
|
276 |
+
"model.layers.35.self_attn.q_proj.weight": "model-00021-of-00051.safetensors",
|
277 |
+
"model.layers.35.self_attn.v_proj.weight": "model-00021-of-00051.safetensors",
|
278 |
+
"model.layers.36.input_layernorm.weight": "model-00022-of-00051.safetensors",
|
279 |
+
"model.layers.36.mlp.down_proj.weight": "model-00022-of-00051.safetensors",
|
280 |
+
"model.layers.36.mlp.gate_proj.weight": "model-00021-of-00051.safetensors",
|
281 |
+
"model.layers.36.mlp.up_proj.weight": "model-00022-of-00051.safetensors",
|
282 |
+
"model.layers.36.post_attention_layernorm.weight": "model-00022-of-00051.safetensors",
|
283 |
+
"model.layers.36.self_attn.k_proj.weight": "model-00021-of-00051.safetensors",
|
284 |
+
"model.layers.36.self_attn.o_proj.weight": "model-00021-of-00051.safetensors",
|
285 |
+
"model.layers.36.self_attn.q_proj.weight": "model-00021-of-00051.safetensors",
|
286 |
+
"model.layers.36.self_attn.v_proj.weight": "model-00021-of-00051.safetensors",
|
287 |
+
"model.layers.37.input_layernorm.weight": "model-00022-of-00051.safetensors",
|
288 |
+
"model.layers.37.mlp.down_proj.weight": "model-00022-of-00051.safetensors",
|
289 |
+
"model.layers.37.mlp.gate_proj.weight": "model-00022-of-00051.safetensors",
|
290 |
+
"model.layers.37.mlp.up_proj.weight": "model-00022-of-00051.safetensors",
|
291 |
+
"model.layers.37.post_attention_layernorm.weight": "model-00022-of-00051.safetensors",
|
292 |
+
"model.layers.37.self_attn.k_proj.weight": "model-00022-of-00051.safetensors",
|
293 |
+
"model.layers.37.self_attn.o_proj.weight": "model-00022-of-00051.safetensors",
|
294 |
+
"model.layers.37.self_attn.q_proj.weight": "model-00022-of-00051.safetensors",
|
295 |
+
"model.layers.37.self_attn.v_proj.weight": "model-00022-of-00051.safetensors",
|
296 |
+
"model.layers.38.input_layernorm.weight": "model-00023-of-00051.safetensors",
|
297 |
+
"model.layers.38.mlp.down_proj.weight": "model-00023-of-00051.safetensors",
|
298 |
+
"model.layers.38.mlp.gate_proj.weight": "model-00023-of-00051.safetensors",
|
299 |
+
"model.layers.38.mlp.up_proj.weight": "model-00023-of-00051.safetensors",
|
300 |
+
"model.layers.38.post_attention_layernorm.weight": "model-00023-of-00051.safetensors",
|
301 |
+
"model.layers.38.self_attn.k_proj.weight": "model-00022-of-00051.safetensors",
|
302 |
+
"model.layers.38.self_attn.o_proj.weight": "model-00022-of-00051.safetensors",
|
303 |
+
"model.layers.38.self_attn.q_proj.weight": "model-00022-of-00051.safetensors",
|
304 |
+
"model.layers.38.self_attn.v_proj.weight": "model-00022-of-00051.safetensors",
|
305 |
+
"model.layers.39.input_layernorm.weight": "model-00023-of-00051.safetensors",
|
306 |
+
"model.layers.39.mlp.down_proj.weight": "model-00023-of-00051.safetensors",
|
307 |
+
"model.layers.39.mlp.gate_proj.weight": "model-00023-of-00051.safetensors",
|
308 |
+
"model.layers.39.mlp.up_proj.weight": "model-00023-of-00051.safetensors",
|
309 |
+
"model.layers.39.post_attention_layernorm.weight": "model-00023-of-00051.safetensors",
|
310 |
+
"model.layers.39.self_attn.k_proj.weight": "model-00023-of-00051.safetensors",
|
311 |
+
"model.layers.39.self_attn.o_proj.weight": "model-00023-of-00051.safetensors",
|
312 |
+
"model.layers.39.self_attn.q_proj.weight": "model-00023-of-00051.safetensors",
|
313 |
+
"model.layers.39.self_attn.v_proj.weight": "model-00023-of-00051.safetensors",
|
314 |
+
"model.layers.4.input_layernorm.weight": "model-00003-of-00051.safetensors",
|
315 |
+
"model.layers.4.mlp.down_proj.weight": "model-00003-of-00051.safetensors",
|
316 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00003-of-00051.safetensors",
|
317 |
+
"model.layers.4.mlp.up_proj.weight": "model-00003-of-00051.safetensors",
|
318 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00003-of-00051.safetensors",
|
319 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00003-of-00051.safetensors",
|
320 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00003-of-00051.safetensors",
|
321 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00003-of-00051.safetensors",
|
322 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00003-of-00051.safetensors",
|
323 |
+
"model.layers.40.input_layernorm.weight": "model-00024-of-00051.safetensors",
|
324 |
+
"model.layers.40.mlp.down_proj.weight": "model-00024-of-00051.safetensors",
|
325 |
+
"model.layers.40.mlp.gate_proj.weight": "model-00024-of-00051.safetensors",
|
326 |
+
"model.layers.40.mlp.up_proj.weight": "model-00024-of-00051.safetensors",
|
327 |
+
"model.layers.40.post_attention_layernorm.weight": "model-00024-of-00051.safetensors",
|
328 |
+
"model.layers.40.self_attn.k_proj.weight": "model-00024-of-00051.safetensors",
|
329 |
+
"model.layers.40.self_attn.o_proj.weight": "model-00024-of-00051.safetensors",
|
330 |
+
"model.layers.40.self_attn.q_proj.weight": "model-00024-of-00051.safetensors",
|
331 |
+
"model.layers.40.self_attn.v_proj.weight": "model-00024-of-00051.safetensors",
|
332 |
+
"model.layers.41.input_layernorm.weight": "model-00025-of-00051.safetensors",
|
333 |
+
"model.layers.41.mlp.down_proj.weight": "model-00025-of-00051.safetensors",
|
334 |
+
"model.layers.41.mlp.gate_proj.weight": "model-00024-of-00051.safetensors",
|
335 |
+
"model.layers.41.mlp.up_proj.weight": "model-00024-of-00051.safetensors",
|
336 |
+
"model.layers.41.post_attention_layernorm.weight": "model-00025-of-00051.safetensors",
|
337 |
+
"model.layers.41.self_attn.k_proj.weight": "model-00024-of-00051.safetensors",
|
338 |
+
"model.layers.41.self_attn.o_proj.weight": "model-00024-of-00051.safetensors",
|
339 |
+
"model.layers.41.self_attn.q_proj.weight": "model-00024-of-00051.safetensors",
|
340 |
+
"model.layers.41.self_attn.v_proj.weight": "model-00024-of-00051.safetensors",
|
341 |
+
"model.layers.42.input_layernorm.weight": "model-00025-of-00051.safetensors",
|
342 |
+
"model.layers.42.mlp.down_proj.weight": "model-00025-of-00051.safetensors",
|
343 |
+
"model.layers.42.mlp.gate_proj.weight": "model-00025-of-00051.safetensors",
|
344 |
+
"model.layers.42.mlp.up_proj.weight": "model-00025-of-00051.safetensors",
|
345 |
+
"model.layers.42.post_attention_layernorm.weight": "model-00025-of-00051.safetensors",
|
346 |
+
"model.layers.42.self_attn.k_proj.weight": "model-00025-of-00051.safetensors",
|
347 |
+
"model.layers.42.self_attn.o_proj.weight": "model-00025-of-00051.safetensors",
|
348 |
+
"model.layers.42.self_attn.q_proj.weight": "model-00025-of-00051.safetensors",
|
349 |
+
"model.layers.42.self_attn.v_proj.weight": "model-00025-of-00051.safetensors",
|
350 |
+
"model.layers.43.input_layernorm.weight": "model-00026-of-00051.safetensors",
|
351 |
+
"model.layers.43.mlp.down_proj.weight": "model-00026-of-00051.safetensors",
|
352 |
+
"model.layers.43.mlp.gate_proj.weight": "model-00025-of-00051.safetensors",
|
353 |
+
"model.layers.43.mlp.up_proj.weight": "model-00026-of-00051.safetensors",
|
354 |
+
"model.layers.43.post_attention_layernorm.weight": "model-00026-of-00051.safetensors",
|
355 |
+
"model.layers.43.self_attn.k_proj.weight": "model-00025-of-00051.safetensors",
|
356 |
+
"model.layers.43.self_attn.o_proj.weight": "model-00025-of-00051.safetensors",
|
357 |
+
"model.layers.43.self_attn.q_proj.weight": "model-00025-of-00051.safetensors",
|
358 |
+
"model.layers.43.self_attn.v_proj.weight": "model-00025-of-00051.safetensors",
|
359 |
+
"model.layers.44.input_layernorm.weight": "model-00026-of-00051.safetensors",
|
360 |
+
"model.layers.44.mlp.down_proj.weight": "model-00026-of-00051.safetensors",
|
361 |
+
"model.layers.44.mlp.gate_proj.weight": "model-00026-of-00051.safetensors",
|
362 |
+
"model.layers.44.mlp.up_proj.weight": "model-00026-of-00051.safetensors",
|
363 |
+
"model.layers.44.post_attention_layernorm.weight": "model-00026-of-00051.safetensors",
|
364 |
+
"model.layers.44.self_attn.k_proj.weight": "model-00026-of-00051.safetensors",
|
365 |
+
"model.layers.44.self_attn.o_proj.weight": "model-00026-of-00051.safetensors",
|
366 |
+
"model.layers.44.self_attn.q_proj.weight": "model-00026-of-00051.safetensors",
|
367 |
+
"model.layers.44.self_attn.v_proj.weight": "model-00026-of-00051.safetensors",
|
368 |
+
"model.layers.45.input_layernorm.weight": "model-00027-of-00051.safetensors",
|
369 |
+
"model.layers.45.mlp.down_proj.weight": "model-00027-of-00051.safetensors",
|
370 |
+
"model.layers.45.mlp.gate_proj.weight": "model-00027-of-00051.safetensors",
|
371 |
+
"model.layers.45.mlp.up_proj.weight": "model-00027-of-00051.safetensors",
|
372 |
+
"model.layers.45.post_attention_layernorm.weight": "model-00027-of-00051.safetensors",
|
373 |
+
"model.layers.45.self_attn.k_proj.weight": "model-00026-of-00051.safetensors",
|
374 |
+
"model.layers.45.self_attn.o_proj.weight": "model-00026-of-00051.safetensors",
|
375 |
+
"model.layers.45.self_attn.q_proj.weight": "model-00026-of-00051.safetensors",
|
376 |
+
"model.layers.45.self_attn.v_proj.weight": "model-00026-of-00051.safetensors",
|
377 |
+
"model.layers.46.input_layernorm.weight": "model-00027-of-00051.safetensors",
|
378 |
+
"model.layers.46.mlp.down_proj.weight": "model-00027-of-00051.safetensors",
|
379 |
+
"model.layers.46.mlp.gate_proj.weight": "model-00027-of-00051.safetensors",
|
380 |
+
"model.layers.46.mlp.up_proj.weight": "model-00027-of-00051.safetensors",
|
381 |
+
"model.layers.46.post_attention_layernorm.weight": "model-00027-of-00051.safetensors",
|
382 |
+
"model.layers.46.self_attn.k_proj.weight": "model-00027-of-00051.safetensors",
|
383 |
+
"model.layers.46.self_attn.o_proj.weight": "model-00027-of-00051.safetensors",
|
384 |
+
"model.layers.46.self_attn.q_proj.weight": "model-00027-of-00051.safetensors",
|
385 |
+
"model.layers.46.self_attn.v_proj.weight": "model-00027-of-00051.safetensors",
|
386 |
+
"model.layers.47.input_layernorm.weight": "model-00028-of-00051.safetensors",
|
387 |
+
"model.layers.47.mlp.down_proj.weight": "model-00028-of-00051.safetensors",
|
388 |
+
"model.layers.47.mlp.gate_proj.weight": "model-00028-of-00051.safetensors",
|
389 |
+
"model.layers.47.mlp.up_proj.weight": "model-00028-of-00051.safetensors",
|
390 |
+
"model.layers.47.post_attention_layernorm.weight": "model-00028-of-00051.safetensors",
|
391 |
+
"model.layers.47.self_attn.k_proj.weight": "model-00028-of-00051.safetensors",
|
392 |
+
"model.layers.47.self_attn.o_proj.weight": "model-00028-of-00051.safetensors",
|
393 |
+
"model.layers.47.self_attn.q_proj.weight": "model-00028-of-00051.safetensors",
|
394 |
+
"model.layers.47.self_attn.v_proj.weight": "model-00028-of-00051.safetensors",
|
395 |
+
"model.layers.48.input_layernorm.weight": "model-00029-of-00051.safetensors",
|
396 |
+
"model.layers.48.mlp.down_proj.weight": "model-00029-of-00051.safetensors",
|
397 |
+
"model.layers.48.mlp.gate_proj.weight": "model-00028-of-00051.safetensors",
|
398 |
+
"model.layers.48.mlp.up_proj.weight": "model-00028-of-00051.safetensors",
|
399 |
+
"model.layers.48.post_attention_layernorm.weight": "model-00029-of-00051.safetensors",
|
400 |
+
"model.layers.48.self_attn.k_proj.weight": "model-00028-of-00051.safetensors",
|
401 |
+
"model.layers.48.self_attn.o_proj.weight": "model-00028-of-00051.safetensors",
|
402 |
+
"model.layers.48.self_attn.q_proj.weight": "model-00028-of-00051.safetensors",
|
403 |
+
"model.layers.48.self_attn.v_proj.weight": "model-00028-of-00051.safetensors",
|
404 |
+
"model.layers.49.input_layernorm.weight": "model-00029-of-00051.safetensors",
|
405 |
+
"model.layers.49.mlp.down_proj.weight": "model-00029-of-00051.safetensors",
|
406 |
+
"model.layers.49.mlp.gate_proj.weight": "model-00029-of-00051.safetensors",
|
407 |
+
"model.layers.49.mlp.up_proj.weight": "model-00029-of-00051.safetensors",
|
408 |
+
"model.layers.49.post_attention_layernorm.weight": "model-00029-of-00051.safetensors",
|
409 |
+
"model.layers.49.self_attn.k_proj.weight": "model-00029-of-00051.safetensors",
|
410 |
+
"model.layers.49.self_attn.o_proj.weight": "model-00029-of-00051.safetensors",
|
411 |
+
"model.layers.49.self_attn.q_proj.weight": "model-00029-of-00051.safetensors",
|
412 |
+
"model.layers.49.self_attn.v_proj.weight": "model-00029-of-00051.safetensors",
|
413 |
+
"model.layers.5.input_layernorm.weight": "model-00004-of-00051.safetensors",
|
414 |
+
"model.layers.5.mlp.down_proj.weight": "model-00004-of-00051.safetensors",
|
415 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00004-of-00051.safetensors",
|
416 |
+
"model.layers.5.mlp.up_proj.weight": "model-00004-of-00051.safetensors",
|
417 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00004-of-00051.safetensors",
|
418 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00004-of-00051.safetensors",
|
419 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00004-of-00051.safetensors",
|
420 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00004-of-00051.safetensors",
|
421 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00004-of-00051.safetensors",
|
422 |
+
"model.layers.50.input_layernorm.weight": "model-00030-of-00051.safetensors",
|
423 |
+
"model.layers.50.mlp.down_proj.weight": "model-00030-of-00051.safetensors",
|
424 |
+
"model.layers.50.mlp.gate_proj.weight": "model-00029-of-00051.safetensors",
|
425 |
+
"model.layers.50.mlp.up_proj.weight": "model-00030-of-00051.safetensors",
|
426 |
+
"model.layers.50.post_attention_layernorm.weight": "model-00030-of-00051.safetensors",
|
427 |
+
"model.layers.50.self_attn.k_proj.weight": "model-00029-of-00051.safetensors",
|
428 |
+
"model.layers.50.self_attn.o_proj.weight": "model-00029-of-00051.safetensors",
|
429 |
+
"model.layers.50.self_attn.q_proj.weight": "model-00029-of-00051.safetensors",
|
430 |
+
"model.layers.50.self_attn.v_proj.weight": "model-00029-of-00051.safetensors",
|
431 |
+
"model.layers.51.input_layernorm.weight": "model-00030-of-00051.safetensors",
|
432 |
+
"model.layers.51.mlp.down_proj.weight": "model-00030-of-00051.safetensors",
|
433 |
+
"model.layers.51.mlp.gate_proj.weight": "model-00030-of-00051.safetensors",
|
434 |
+
"model.layers.51.mlp.up_proj.weight": "model-00030-of-00051.safetensors",
|
435 |
+
"model.layers.51.post_attention_layernorm.weight": "model-00030-of-00051.safetensors",
|
436 |
+
"model.layers.51.self_attn.k_proj.weight": "model-00030-of-00051.safetensors",
|
437 |
+
"model.layers.51.self_attn.o_proj.weight": "model-00030-of-00051.safetensors",
|
438 |
+
"model.layers.51.self_attn.q_proj.weight": "model-00030-of-00051.safetensors",
|
439 |
+
"model.layers.51.self_attn.v_proj.weight": "model-00030-of-00051.safetensors",
|
440 |
+
"model.layers.52.input_layernorm.weight": "model-00031-of-00051.safetensors",
|
441 |
+
"model.layers.52.mlp.down_proj.weight": "model-00031-of-00051.safetensors",
|
442 |
+
"model.layers.52.mlp.gate_proj.weight": "model-00031-of-00051.safetensors",
|
443 |
+
"model.layers.52.mlp.up_proj.weight": "model-00031-of-00051.safetensors",
|
444 |
+
"model.layers.52.post_attention_layernorm.weight": "model-00031-of-00051.safetensors",
|
445 |
+
"model.layers.52.self_attn.k_proj.weight": "model-00030-of-00051.safetensors",
|
446 |
+
"model.layers.52.self_attn.o_proj.weight": "model-00030-of-00051.safetensors",
|
447 |
+
"model.layers.52.self_attn.q_proj.weight": "model-00030-of-00051.safetensors",
|
448 |
+
"model.layers.52.self_attn.v_proj.weight": "model-00030-of-00051.safetensors",
|
449 |
+
"model.layers.53.input_layernorm.weight": "model-00031-of-00051.safetensors",
|
450 |
+
"model.layers.53.mlp.down_proj.weight": "model-00031-of-00051.safetensors",
|
451 |
+
"model.layers.53.mlp.gate_proj.weight": "model-00031-of-00051.safetensors",
|
452 |
+
"model.layers.53.mlp.up_proj.weight": "model-00031-of-00051.safetensors",
|
453 |
+
"model.layers.53.post_attention_layernorm.weight": "model-00031-of-00051.safetensors",
|
454 |
+
"model.layers.53.self_attn.k_proj.weight": "model-00031-of-00051.safetensors",
|
455 |
+
"model.layers.53.self_attn.o_proj.weight": "model-00031-of-00051.safetensors",
|
456 |
+
"model.layers.53.self_attn.q_proj.weight": "model-00031-of-00051.safetensors",
|
457 |
+
"model.layers.53.self_attn.v_proj.weight": "model-00031-of-00051.safetensors",
|
458 |
+
"model.layers.54.input_layernorm.weight": "model-00032-of-00051.safetensors",
|
459 |
+
"model.layers.54.mlp.down_proj.weight": "model-00032-of-00051.safetensors",
|
460 |
+
"model.layers.54.mlp.gate_proj.weight": "model-00032-of-00051.safetensors",
|
461 |
+
"model.layers.54.mlp.up_proj.weight": "model-00032-of-00051.safetensors",
|
462 |
+
"model.layers.54.post_attention_layernorm.weight": "model-00032-of-00051.safetensors",
|
463 |
+
"model.layers.54.self_attn.k_proj.weight": "model-00032-of-00051.safetensors",
|
464 |
+
"model.layers.54.self_attn.o_proj.weight": "model-00032-of-00051.safetensors",
|
465 |
+
"model.layers.54.self_attn.q_proj.weight": "model-00032-of-00051.safetensors",
|
466 |
+
"model.layers.54.self_attn.v_proj.weight": "model-00032-of-00051.safetensors",
|
467 |
+
"model.layers.55.input_layernorm.weight": "model-00033-of-00051.safetensors",
|
468 |
+
"model.layers.55.mlp.down_proj.weight": "model-00033-of-00051.safetensors",
|
469 |
+
"model.layers.55.mlp.gate_proj.weight": "model-00032-of-00051.safetensors",
|
470 |
+
"model.layers.55.mlp.up_proj.weight": "model-00032-of-00051.safetensors",
|
471 |
+
"model.layers.55.post_attention_layernorm.weight": "model-00033-of-00051.safetensors",
|
472 |
+
"model.layers.55.self_attn.k_proj.weight": "model-00032-of-00051.safetensors",
|
473 |
+
"model.layers.55.self_attn.o_proj.weight": "model-00032-of-00051.safetensors",
|
474 |
+
"model.layers.55.self_attn.q_proj.weight": "model-00032-of-00051.safetensors",
|
475 |
+
"model.layers.55.self_attn.v_proj.weight": "model-00032-of-00051.safetensors",
|
476 |
+
"model.layers.56.input_layernorm.weight": "model-00033-of-00051.safetensors",
|
477 |
+
"model.layers.56.mlp.down_proj.weight": "model-00033-of-00051.safetensors",
|
478 |
+
"model.layers.56.mlp.gate_proj.weight": "model-00033-of-00051.safetensors",
|
479 |
+
"model.layers.56.mlp.up_proj.weight": "model-00033-of-00051.safetensors",
|
480 |
+
"model.layers.56.post_attention_layernorm.weight": "model-00033-of-00051.safetensors",
|
481 |
+
"model.layers.56.self_attn.k_proj.weight": "model-00033-of-00051.safetensors",
|
482 |
+
"model.layers.56.self_attn.o_proj.weight": "model-00033-of-00051.safetensors",
|
483 |
+
"model.layers.56.self_attn.q_proj.weight": "model-00033-of-00051.safetensors",
|
484 |
+
"model.layers.56.self_attn.v_proj.weight": "model-00033-of-00051.safetensors",
|
485 |
+
"model.layers.57.input_layernorm.weight": "model-00034-of-00051.safetensors",
|
486 |
+
"model.layers.57.mlp.down_proj.weight": "model-00034-of-00051.safetensors",
|
487 |
+
"model.layers.57.mlp.gate_proj.weight": "model-00033-of-00051.safetensors",
|
488 |
+
"model.layers.57.mlp.up_proj.weight": "model-00034-of-00051.safetensors",
|
489 |
+
"model.layers.57.post_attention_layernorm.weight": "model-00034-of-00051.safetensors",
|
490 |
+
"model.layers.57.self_attn.k_proj.weight": "model-00033-of-00051.safetensors",
|
491 |
+
"model.layers.57.self_attn.o_proj.weight": "model-00033-of-00051.safetensors",
|
492 |
+
"model.layers.57.self_attn.q_proj.weight": "model-00033-of-00051.safetensors",
|
493 |
+
"model.layers.57.self_attn.v_proj.weight": "model-00033-of-00051.safetensors",
|
494 |
+
"model.layers.58.input_layernorm.weight": "model-00034-of-00051.safetensors",
|
495 |
+
"model.layers.58.mlp.down_proj.weight": "model-00034-of-00051.safetensors",
|
496 |
+
"model.layers.58.mlp.gate_proj.weight": "model-00034-of-00051.safetensors",
|
497 |
+
"model.layers.58.mlp.up_proj.weight": "model-00034-of-00051.safetensors",
|
498 |
+
"model.layers.58.post_attention_layernorm.weight": "model-00034-of-00051.safetensors",
|
499 |
+
"model.layers.58.self_attn.k_proj.weight": "model-00034-of-00051.safetensors",
|
500 |
+
"model.layers.58.self_attn.o_proj.weight": "model-00034-of-00051.safetensors",
|
501 |
+
"model.layers.58.self_attn.q_proj.weight": "model-00034-of-00051.safetensors",
|
502 |
+
"model.layers.58.self_attn.v_proj.weight": "model-00034-of-00051.safetensors",
|
503 |
+
"model.layers.59.input_layernorm.weight": "model-00035-of-00051.safetensors",
|
504 |
+
"model.layers.59.mlp.down_proj.weight": "model-00035-of-00051.safetensors",
|
505 |
+
"model.layers.59.mlp.gate_proj.weight": "model-00035-of-00051.safetensors",
|
506 |
+
"model.layers.59.mlp.up_proj.weight": "model-00035-of-00051.safetensors",
|
507 |
+
"model.layers.59.post_attention_layernorm.weight": "model-00035-of-00051.safetensors",
|
508 |
+
"model.layers.59.self_attn.k_proj.weight": "model-00034-of-00051.safetensors",
|
509 |
+
"model.layers.59.self_attn.o_proj.weight": "model-00034-of-00051.safetensors",
|
510 |
+
"model.layers.59.self_attn.q_proj.weight": "model-00034-of-00051.safetensors",
|
511 |
+
"model.layers.59.self_attn.v_proj.weight": "model-00034-of-00051.safetensors",
|
512 |
+
"model.layers.6.input_layernorm.weight": "model-00005-of-00051.safetensors",
|
513 |
+
"model.layers.6.mlp.down_proj.weight": "model-00005-of-00051.safetensors",
|
514 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00004-of-00051.safetensors",
|
515 |
+
"model.layers.6.mlp.up_proj.weight": "model-00004-of-00051.safetensors",
|
516 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00005-of-00051.safetensors",
|
517 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00004-of-00051.safetensors",
|
518 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00004-of-00051.safetensors",
|
519 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00004-of-00051.safetensors",
|
520 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00004-of-00051.safetensors",
|
521 |
+
"model.layers.60.input_layernorm.weight": "model-00035-of-00051.safetensors",
|
522 |
+
"model.layers.60.mlp.down_proj.weight": "model-00035-of-00051.safetensors",
|
523 |
+
"model.layers.60.mlp.gate_proj.weight": "model-00035-of-00051.safetensors",
|
524 |
+
"model.layers.60.mlp.up_proj.weight": "model-00035-of-00051.safetensors",
|
525 |
+
"model.layers.60.post_attention_layernorm.weight": "model-00035-of-00051.safetensors",
|
526 |
+
"model.layers.60.self_attn.k_proj.weight": "model-00035-of-00051.safetensors",
|
527 |
+
"model.layers.60.self_attn.o_proj.weight": "model-00035-of-00051.safetensors",
|
528 |
+
"model.layers.60.self_attn.q_proj.weight": "model-00035-of-00051.safetensors",
|
529 |
+
"model.layers.60.self_attn.v_proj.weight": "model-00035-of-00051.safetensors",
|
530 |
+
"model.layers.61.input_layernorm.weight": "model-00036-of-00051.safetensors",
|
531 |
+
"model.layers.61.mlp.down_proj.weight": "model-00036-of-00051.safetensors",
|
532 |
+
"model.layers.61.mlp.gate_proj.weight": "model-00036-of-00051.safetensors",
|
533 |
+
"model.layers.61.mlp.up_proj.weight": "model-00036-of-00051.safetensors",
|
534 |
+
"model.layers.61.post_attention_layernorm.weight": "model-00036-of-00051.safetensors",
|
535 |
+
"model.layers.61.self_attn.k_proj.weight": "model-00036-of-00051.safetensors",
|
536 |
+
"model.layers.61.self_attn.o_proj.weight": "model-00036-of-00051.safetensors",
|
537 |
+
"model.layers.61.self_attn.q_proj.weight": "model-00036-of-00051.safetensors",
|
538 |
+
"model.layers.61.self_attn.v_proj.weight": "model-00036-of-00051.safetensors",
|
539 |
+
"model.layers.62.input_layernorm.weight": "model-00037-of-00051.safetensors",
|
540 |
+
"model.layers.62.mlp.down_proj.weight": "model-00037-of-00051.safetensors",
|
541 |
+
"model.layers.62.mlp.gate_proj.weight": "model-00036-of-00051.safetensors",
|
542 |
+
"model.layers.62.mlp.up_proj.weight": "model-00036-of-00051.safetensors",
|
543 |
+
"model.layers.62.post_attention_layernorm.weight": "model-00037-of-00051.safetensors",
|
544 |
+
"model.layers.62.self_attn.k_proj.weight": "model-00036-of-00051.safetensors",
|
545 |
+
"model.layers.62.self_attn.o_proj.weight": "model-00036-of-00051.safetensors",
|
546 |
+
"model.layers.62.self_attn.q_proj.weight": "model-00036-of-00051.safetensors",
|
547 |
+
"model.layers.62.self_attn.v_proj.weight": "model-00036-of-00051.safetensors",
|
548 |
+
"model.layers.63.input_layernorm.weight": "model-00037-of-00051.safetensors",
|
549 |
+
"model.layers.63.mlp.down_proj.weight": "model-00037-of-00051.safetensors",
|
550 |
+
"model.layers.63.mlp.gate_proj.weight": "model-00037-of-00051.safetensors",
|
551 |
+
"model.layers.63.mlp.up_proj.weight": "model-00037-of-00051.safetensors",
|
552 |
+
"model.layers.63.post_attention_layernorm.weight": "model-00037-of-00051.safetensors",
|
553 |
+
"model.layers.63.self_attn.k_proj.weight": "model-00037-of-00051.safetensors",
|
554 |
+
"model.layers.63.self_attn.o_proj.weight": "model-00037-of-00051.safetensors",
|
555 |
+
"model.layers.63.self_attn.q_proj.weight": "model-00037-of-00051.safetensors",
|
556 |
+
"model.layers.63.self_attn.v_proj.weight": "model-00037-of-00051.safetensors",
|
557 |
+
"model.layers.64.input_layernorm.weight": "model-00038-of-00051.safetensors",
|
558 |
+
"model.layers.64.mlp.down_proj.weight": "model-00038-of-00051.safetensors",
|
559 |
+
"model.layers.64.mlp.gate_proj.weight": "model-00037-of-00051.safetensors",
|
560 |
+
"model.layers.64.mlp.up_proj.weight": "model-00038-of-00051.safetensors",
|
561 |
+
"model.layers.64.post_attention_layernorm.weight": "model-00038-of-00051.safetensors",
|
562 |
+
"model.layers.64.self_attn.k_proj.weight": "model-00037-of-00051.safetensors",
|
563 |
+
"model.layers.64.self_attn.o_proj.weight": "model-00037-of-00051.safetensors",
|
564 |
+
"model.layers.64.self_attn.q_proj.weight": "model-00037-of-00051.safetensors",
|
565 |
+
"model.layers.64.self_attn.v_proj.weight": "model-00037-of-00051.safetensors",
|
566 |
+
"model.layers.65.input_layernorm.weight": "model-00038-of-00051.safetensors",
|
567 |
+
"model.layers.65.mlp.down_proj.weight": "model-00038-of-00051.safetensors",
|
568 |
+
"model.layers.65.mlp.gate_proj.weight": "model-00038-of-00051.safetensors",
|
569 |
+
"model.layers.65.mlp.up_proj.weight": "model-00038-of-00051.safetensors",
|
570 |
+
"model.layers.65.post_attention_layernorm.weight": "model-00038-of-00051.safetensors",
|
571 |
+
"model.layers.65.self_attn.k_proj.weight": "model-00038-of-00051.safetensors",
|
572 |
+
"model.layers.65.self_attn.o_proj.weight": "model-00038-of-00051.safetensors",
|
573 |
+
"model.layers.65.self_attn.q_proj.weight": "model-00038-of-00051.safetensors",
|
574 |
+
"model.layers.65.self_attn.v_proj.weight": "model-00038-of-00051.safetensors",
|
575 |
+
"model.layers.66.input_layernorm.weight": "model-00039-of-00051.safetensors",
|
576 |
+
"model.layers.66.mlp.down_proj.weight": "model-00039-of-00051.safetensors",
|
577 |
+
"model.layers.66.mlp.gate_proj.weight": "model-00039-of-00051.safetensors",
|
578 |
+
"model.layers.66.mlp.up_proj.weight": "model-00039-of-00051.safetensors",
|
579 |
+
"model.layers.66.post_attention_layernorm.weight": "model-00039-of-00051.safetensors",
|
580 |
+
"model.layers.66.self_attn.k_proj.weight": "model-00038-of-00051.safetensors",
|
581 |
+
"model.layers.66.self_attn.o_proj.weight": "model-00038-of-00051.safetensors",
|
582 |
+
"model.layers.66.self_attn.q_proj.weight": "model-00038-of-00051.safetensors",
|
583 |
+
"model.layers.66.self_attn.v_proj.weight": "model-00038-of-00051.safetensors",
|
584 |
+
"model.layers.67.input_layernorm.weight": "model-00039-of-00051.safetensors",
|
585 |
+
"model.layers.67.mlp.down_proj.weight": "model-00039-of-00051.safetensors",
|
586 |
+
"model.layers.67.mlp.gate_proj.weight": "model-00039-of-00051.safetensors",
|
587 |
+
"model.layers.67.mlp.up_proj.weight": "model-00039-of-00051.safetensors",
|
588 |
+
"model.layers.67.post_attention_layernorm.weight": "model-00039-of-00051.safetensors",
|
589 |
+
"model.layers.67.self_attn.k_proj.weight": "model-00039-of-00051.safetensors",
|
590 |
+
"model.layers.67.self_attn.o_proj.weight": "model-00039-of-00051.safetensors",
|
591 |
+
"model.layers.67.self_attn.q_proj.weight": "model-00039-of-00051.safetensors",
|
592 |
+
"model.layers.67.self_attn.v_proj.weight": "model-00039-of-00051.safetensors",
|
593 |
+
"model.layers.68.input_layernorm.weight": "model-00040-of-00051.safetensors",
|
594 |
+
"model.layers.68.mlp.down_proj.weight": "model-00040-of-00051.safetensors",
|
595 |
+
"model.layers.68.mlp.gate_proj.weight": "model-00040-of-00051.safetensors",
|
596 |
+
"model.layers.68.mlp.up_proj.weight": "model-00040-of-00051.safetensors",
|
597 |
+
"model.layers.68.post_attention_layernorm.weight": "model-00040-of-00051.safetensors",
|
598 |
+
"model.layers.68.self_attn.k_proj.weight": "model-00040-of-00051.safetensors",
|
599 |
+
"model.layers.68.self_attn.o_proj.weight": "model-00040-of-00051.safetensors",
|
600 |
+
"model.layers.68.self_attn.q_proj.weight": "model-00040-of-00051.safetensors",
|
601 |
+
"model.layers.68.self_attn.v_proj.weight": "model-00040-of-00051.safetensors",
|
602 |
+
"model.layers.69.input_layernorm.weight": "model-00041-of-00051.safetensors",
|
603 |
+
"model.layers.69.mlp.down_proj.weight": "model-00041-of-00051.safetensors",
|
604 |
+
"model.layers.69.mlp.gate_proj.weight": "model-00040-of-00051.safetensors",
|
605 |
+
"model.layers.69.mlp.up_proj.weight": "model-00040-of-00051.safetensors",
|
606 |
+
"model.layers.69.post_attention_layernorm.weight": "model-00041-of-00051.safetensors",
|
607 |
+
"model.layers.69.self_attn.k_proj.weight": "model-00040-of-00051.safetensors",
|
608 |
+
"model.layers.69.self_attn.o_proj.weight": "model-00040-of-00051.safetensors",
|
609 |
+
"model.layers.69.self_attn.q_proj.weight": "model-00040-of-00051.safetensors",
|
610 |
+
"model.layers.69.self_attn.v_proj.weight": "model-00040-of-00051.safetensors",
|
611 |
+
"model.layers.7.input_layernorm.weight": "model-00005-of-00051.safetensors",
|
612 |
+
"model.layers.7.mlp.down_proj.weight": "model-00005-of-00051.safetensors",
|
613 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00005-of-00051.safetensors",
|
614 |
+
"model.layers.7.mlp.up_proj.weight": "model-00005-of-00051.safetensors",
|
615 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00005-of-00051.safetensors",
|
616 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00005-of-00051.safetensors",
|
617 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00005-of-00051.safetensors",
|
618 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00005-of-00051.safetensors",
|
619 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00005-of-00051.safetensors",
|
620 |
+
"model.layers.70.input_layernorm.weight": "model-00041-of-00051.safetensors",
|
621 |
+
"model.layers.70.mlp.down_proj.weight": "model-00041-of-00051.safetensors",
|
622 |
+
"model.layers.70.mlp.gate_proj.weight": "model-00041-of-00051.safetensors",
|
623 |
+
"model.layers.70.mlp.up_proj.weight": "model-00041-of-00051.safetensors",
|
624 |
+
"model.layers.70.post_attention_layernorm.weight": "model-00041-of-00051.safetensors",
|
625 |
+
"model.layers.70.self_attn.k_proj.weight": "model-00041-of-00051.safetensors",
|
626 |
+
"model.layers.70.self_attn.o_proj.weight": "model-00041-of-00051.safetensors",
|
627 |
+
"model.layers.70.self_attn.q_proj.weight": "model-00041-of-00051.safetensors",
|
628 |
+
"model.layers.70.self_attn.v_proj.weight": "model-00041-of-00051.safetensors",
|
629 |
+
"model.layers.71.input_layernorm.weight": "model-00042-of-00051.safetensors",
|
630 |
+
"model.layers.71.mlp.down_proj.weight": "model-00042-of-00051.safetensors",
|
631 |
+
"model.layers.71.mlp.gate_proj.weight": "model-00041-of-00051.safetensors",
|
632 |
+
"model.layers.71.mlp.up_proj.weight": "model-00042-of-00051.safetensors",
|
633 |
+
"model.layers.71.post_attention_layernorm.weight": "model-00042-of-00051.safetensors",
|
634 |
+
"model.layers.71.self_attn.k_proj.weight": "model-00041-of-00051.safetensors",
|
635 |
+
"model.layers.71.self_attn.o_proj.weight": "model-00041-of-00051.safetensors",
|
636 |
+
"model.layers.71.self_attn.q_proj.weight": "model-00041-of-00051.safetensors",
|
637 |
+
"model.layers.71.self_attn.v_proj.weight": "model-00041-of-00051.safetensors",
|
638 |
+
"model.layers.72.input_layernorm.weight": "model-00042-of-00051.safetensors",
|
639 |
+
"model.layers.72.mlp.down_proj.weight": "model-00042-of-00051.safetensors",
|
640 |
+
"model.layers.72.mlp.gate_proj.weight": "model-00042-of-00051.safetensors",
|
641 |
+
"model.layers.72.mlp.up_proj.weight": "model-00042-of-00051.safetensors",
|
642 |
+
"model.layers.72.post_attention_layernorm.weight": "model-00042-of-00051.safetensors",
|
643 |
+
"model.layers.72.self_attn.k_proj.weight": "model-00042-of-00051.safetensors",
|
644 |
+
"model.layers.72.self_attn.o_proj.weight": "model-00042-of-00051.safetensors",
|
645 |
+
"model.layers.72.self_attn.q_proj.weight": "model-00042-of-00051.safetensors",
|
646 |
+
"model.layers.72.self_attn.v_proj.weight": "model-00042-of-00051.safetensors",
|
647 |
+
"model.layers.73.input_layernorm.weight": "model-00043-of-00051.safetensors",
|
648 |
+
"model.layers.73.mlp.down_proj.weight": "model-00043-of-00051.safetensors",
|
649 |
+
"model.layers.73.mlp.gate_proj.weight": "model-00043-of-00051.safetensors",
|
650 |
+
"model.layers.73.mlp.up_proj.weight": "model-00043-of-00051.safetensors",
|
651 |
+
"model.layers.73.post_attention_layernorm.weight": "model-00043-of-00051.safetensors",
|
652 |
+
"model.layers.73.self_attn.k_proj.weight": "model-00042-of-00051.safetensors",
|
653 |
+
"model.layers.73.self_attn.o_proj.weight": "model-00042-of-00051.safetensors",
|
654 |
+
"model.layers.73.self_attn.q_proj.weight": "model-00042-of-00051.safetensors",
|
655 |
+
"model.layers.73.self_attn.v_proj.weight": "model-00042-of-00051.safetensors",
|
656 |
+
"model.layers.74.input_layernorm.weight": "model-00043-of-00051.safetensors",
|
657 |
+
"model.layers.74.mlp.down_proj.weight": "model-00043-of-00051.safetensors",
|
658 |
+
"model.layers.74.mlp.gate_proj.weight": "model-00043-of-00051.safetensors",
|
659 |
+
"model.layers.74.mlp.up_proj.weight": "model-00043-of-00051.safetensors",
|
660 |
+
"model.layers.74.post_attention_layernorm.weight": "model-00043-of-00051.safetensors",
|
661 |
+
"model.layers.74.self_attn.k_proj.weight": "model-00043-of-00051.safetensors",
|
662 |
+
"model.layers.74.self_attn.o_proj.weight": "model-00043-of-00051.safetensors",
|
663 |
+
"model.layers.74.self_attn.q_proj.weight": "model-00043-of-00051.safetensors",
|
664 |
+
"model.layers.74.self_attn.v_proj.weight": "model-00043-of-00051.safetensors",
|
665 |
+
"model.layers.75.input_layernorm.weight": "model-00044-of-00051.safetensors",
|
666 |
+
"model.layers.75.mlp.down_proj.weight": "model-00044-of-00051.safetensors",
|
667 |
+
"model.layers.75.mlp.gate_proj.weight": "model-00044-of-00051.safetensors",
|
668 |
+
"model.layers.75.mlp.up_proj.weight": "model-00044-of-00051.safetensors",
|
669 |
+
"model.layers.75.post_attention_layernorm.weight": "model-00044-of-00051.safetensors",
|
670 |
+
"model.layers.75.self_attn.k_proj.weight": "model-00044-of-00051.safetensors",
|
671 |
+
"model.layers.75.self_attn.o_proj.weight": "model-00044-of-00051.safetensors",
|
672 |
+
"model.layers.75.self_attn.q_proj.weight": "model-00044-of-00051.safetensors",
|
673 |
+
"model.layers.75.self_attn.v_proj.weight": "model-00044-of-00051.safetensors",
|
674 |
+
"model.layers.76.input_layernorm.weight": "model-00045-of-00051.safetensors",
|
675 |
+
"model.layers.76.mlp.down_proj.weight": "model-00045-of-00051.safetensors",
|
676 |
+
"model.layers.76.mlp.gate_proj.weight": "model-00044-of-00051.safetensors",
|
677 |
+
"model.layers.76.mlp.up_proj.weight": "model-00044-of-00051.safetensors",
|
678 |
+
"model.layers.76.post_attention_layernorm.weight": "model-00045-of-00051.safetensors",
|
679 |
+
"model.layers.76.self_attn.k_proj.weight": "model-00044-of-00051.safetensors",
|
680 |
+
"model.layers.76.self_attn.o_proj.weight": "model-00044-of-00051.safetensors",
|
681 |
+
"model.layers.76.self_attn.q_proj.weight": "model-00044-of-00051.safetensors",
|
682 |
+
"model.layers.76.self_attn.v_proj.weight": "model-00044-of-00051.safetensors",
|
683 |
+
"model.layers.77.input_layernorm.weight": "model-00045-of-00051.safetensors",
|
684 |
+
"model.layers.77.mlp.down_proj.weight": "model-00045-of-00051.safetensors",
|
685 |
+
"model.layers.77.mlp.gate_proj.weight": "model-00045-of-00051.safetensors",
|
686 |
+
"model.layers.77.mlp.up_proj.weight": "model-00045-of-00051.safetensors",
|
687 |
+
"model.layers.77.post_attention_layernorm.weight": "model-00045-of-00051.safetensors",
|
688 |
+
"model.layers.77.self_attn.k_proj.weight": "model-00045-of-00051.safetensors",
|
689 |
+
"model.layers.77.self_attn.o_proj.weight": "model-00045-of-00051.safetensors",
|
690 |
+
"model.layers.77.self_attn.q_proj.weight": "model-00045-of-00051.safetensors",
|
691 |
+
"model.layers.77.self_attn.v_proj.weight": "model-00045-of-00051.safetensors",
|
692 |
+
"model.layers.78.input_layernorm.weight": "model-00046-of-00051.safetensors",
|
693 |
+
"model.layers.78.mlp.down_proj.weight": "model-00046-of-00051.safetensors",
|
694 |
+
"model.layers.78.mlp.gate_proj.weight": "model-00045-of-00051.safetensors",
|
695 |
+
"model.layers.78.mlp.up_proj.weight": "model-00046-of-00051.safetensors",
|
696 |
+
"model.layers.78.post_attention_layernorm.weight": "model-00046-of-00051.safetensors",
|
697 |
+
"model.layers.78.self_attn.k_proj.weight": "model-00045-of-00051.safetensors",
|
698 |
+
"model.layers.78.self_attn.o_proj.weight": "model-00045-of-00051.safetensors",
|
699 |
+
"model.layers.78.self_attn.q_proj.weight": "model-00045-of-00051.safetensors",
|
700 |
+
"model.layers.78.self_attn.v_proj.weight": "model-00045-of-00051.safetensors",
|
701 |
+
"model.layers.79.input_layernorm.weight": "model-00046-of-00051.safetensors",
|
702 |
+
"model.layers.79.mlp.down_proj.weight": "model-00046-of-00051.safetensors",
|
703 |
+
"model.layers.79.mlp.gate_proj.weight": "model-00046-of-00051.safetensors",
|
704 |
+
"model.layers.79.mlp.up_proj.weight": "model-00046-of-00051.safetensors",
|
705 |
+
"model.layers.79.post_attention_layernorm.weight": "model-00046-of-00051.safetensors",
|
706 |
+
"model.layers.79.self_attn.k_proj.weight": "model-00046-of-00051.safetensors",
|
707 |
+
"model.layers.79.self_attn.o_proj.weight": "model-00046-of-00051.safetensors",
|
708 |
+
"model.layers.79.self_attn.q_proj.weight": "model-00046-of-00051.safetensors",
|
709 |
+
"model.layers.79.self_attn.v_proj.weight": "model-00046-of-00051.safetensors",
|
710 |
+
"model.layers.8.input_layernorm.weight": "model-00006-of-00051.safetensors",
|
711 |
+
"model.layers.8.mlp.down_proj.weight": "model-00006-of-00051.safetensors",
|
712 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00005-of-00051.safetensors",
|
713 |
+
"model.layers.8.mlp.up_proj.weight": "model-00006-of-00051.safetensors",
|
714 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00006-of-00051.safetensors",
|
715 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00005-of-00051.safetensors",
|
716 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00005-of-00051.safetensors",
|
717 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00005-of-00051.safetensors",
|
718 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00005-of-00051.safetensors",
|
719 |
+
"model.layers.80.input_layernorm.weight": "model-00047-of-00051.safetensors",
|
720 |
+
"model.layers.80.mlp.down_proj.weight": "model-00047-of-00051.safetensors",
|
721 |
+
"model.layers.80.mlp.gate_proj.weight": "model-00047-of-00051.safetensors",
|
722 |
+
"model.layers.80.mlp.up_proj.weight": "model-00047-of-00051.safetensors",
|
723 |
+
"model.layers.80.post_attention_layernorm.weight": "model-00047-of-00051.safetensors",
|
724 |
+
"model.layers.80.self_attn.k_proj.weight": "model-00046-of-00051.safetensors",
|
725 |
+
"model.layers.80.self_attn.o_proj.weight": "model-00046-of-00051.safetensors",
|
726 |
+
"model.layers.80.self_attn.q_proj.weight": "model-00046-of-00051.safetensors",
|
727 |
+
"model.layers.80.self_attn.v_proj.weight": "model-00046-of-00051.safetensors",
|
728 |
+
"model.layers.81.input_layernorm.weight": "model-00047-of-00051.safetensors",
|
729 |
+
"model.layers.81.mlp.down_proj.weight": "model-00047-of-00051.safetensors",
|
730 |
+
"model.layers.81.mlp.gate_proj.weight": "model-00047-of-00051.safetensors",
|
731 |
+
"model.layers.81.mlp.up_proj.weight": "model-00047-of-00051.safetensors",
|
732 |
+
"model.layers.81.post_attention_layernorm.weight": "model-00047-of-00051.safetensors",
|
733 |
+
"model.layers.81.self_attn.k_proj.weight": "model-00047-of-00051.safetensors",
|
734 |
+
"model.layers.81.self_attn.o_proj.weight": "model-00047-of-00051.safetensors",
|
735 |
+
"model.layers.81.self_attn.q_proj.weight": "model-00047-of-00051.safetensors",
|
736 |
+
"model.layers.81.self_attn.v_proj.weight": "model-00047-of-00051.safetensors",
|
737 |
+
"model.layers.82.input_layernorm.weight": "model-00048-of-00051.safetensors",
|
738 |
+
"model.layers.82.mlp.down_proj.weight": "model-00048-of-00051.safetensors",
|
739 |
+
"model.layers.82.mlp.gate_proj.weight": "model-00048-of-00051.safetensors",
|
740 |
+
"model.layers.82.mlp.up_proj.weight": "model-00048-of-00051.safetensors",
|
741 |
+
"model.layers.82.post_attention_layernorm.weight": "model-00048-of-00051.safetensors",
|
742 |
+
"model.layers.82.self_attn.k_proj.weight": "model-00048-of-00051.safetensors",
|
743 |
+
"model.layers.82.self_attn.o_proj.weight": "model-00048-of-00051.safetensors",
|
744 |
+
"model.layers.82.self_attn.q_proj.weight": "model-00048-of-00051.safetensors",
|
745 |
+
"model.layers.82.self_attn.v_proj.weight": "model-00048-of-00051.safetensors",
|
746 |
+
"model.layers.83.input_layernorm.weight": "model-00049-of-00051.safetensors",
|
747 |
+
"model.layers.83.mlp.down_proj.weight": "model-00049-of-00051.safetensors",
|
748 |
+
"model.layers.83.mlp.gate_proj.weight": "model-00048-of-00051.safetensors",
|
749 |
+
"model.layers.83.mlp.up_proj.weight": "model-00048-of-00051.safetensors",
|
750 |
+
"model.layers.83.post_attention_layernorm.weight": "model-00049-of-00051.safetensors",
|
751 |
+
"model.layers.83.self_attn.k_proj.weight": "model-00048-of-00051.safetensors",
|
752 |
+
"model.layers.83.self_attn.o_proj.weight": "model-00048-of-00051.safetensors",
|
753 |
+
"model.layers.83.self_attn.q_proj.weight": "model-00048-of-00051.safetensors",
|
754 |
+
"model.layers.83.self_attn.v_proj.weight": "model-00048-of-00051.safetensors",
|
755 |
+
"model.layers.84.input_layernorm.weight": "model-00049-of-00051.safetensors",
|
756 |
+
"model.layers.84.mlp.down_proj.weight": "model-00049-of-00051.safetensors",
|
757 |
+
"model.layers.84.mlp.gate_proj.weight": "model-00049-of-00051.safetensors",
|
758 |
+
"model.layers.84.mlp.up_proj.weight": "model-00049-of-00051.safetensors",
|
759 |
+
"model.layers.84.post_attention_layernorm.weight": "model-00049-of-00051.safetensors",
|
760 |
+
"model.layers.84.self_attn.k_proj.weight": "model-00049-of-00051.safetensors",
|
761 |
+
"model.layers.84.self_attn.o_proj.weight": "model-00049-of-00051.safetensors",
|
762 |
+
"model.layers.84.self_attn.q_proj.weight": "model-00049-of-00051.safetensors",
|
763 |
+
"model.layers.84.self_attn.v_proj.weight": "model-00049-of-00051.safetensors",
|
764 |
+
"model.layers.85.input_layernorm.weight": "model-00050-of-00051.safetensors",
|
765 |
+
"model.layers.85.mlp.down_proj.weight": "model-00050-of-00051.safetensors",
|
766 |
+
"model.layers.85.mlp.gate_proj.weight": "model-00049-of-00051.safetensors",
|
767 |
+
"model.layers.85.mlp.up_proj.weight": "model-00050-of-00051.safetensors",
|
768 |
+
"model.layers.85.post_attention_layernorm.weight": "model-00050-of-00051.safetensors",
|
769 |
+
"model.layers.85.self_attn.k_proj.weight": "model-00049-of-00051.safetensors",
|
770 |
+
"model.layers.85.self_attn.o_proj.weight": "model-00049-of-00051.safetensors",
|
771 |
+
"model.layers.85.self_attn.q_proj.weight": "model-00049-of-00051.safetensors",
|
772 |
+
"model.layers.85.self_attn.v_proj.weight": "model-00049-of-00051.safetensors",
|
773 |
+
"model.layers.86.input_layernorm.weight": "model-00050-of-00051.safetensors",
|
774 |
+
"model.layers.86.mlp.down_proj.weight": "model-00050-of-00051.safetensors",
|
775 |
+
"model.layers.86.mlp.gate_proj.weight": "model-00050-of-00051.safetensors",
|
776 |
+
"model.layers.86.mlp.up_proj.weight": "model-00050-of-00051.safetensors",
|
777 |
+
"model.layers.86.post_attention_layernorm.weight": "model-00050-of-00051.safetensors",
|
778 |
+
"model.layers.86.self_attn.k_proj.weight": "model-00050-of-00051.safetensors",
|
779 |
+
"model.layers.86.self_attn.o_proj.weight": "model-00050-of-00051.safetensors",
|
780 |
+
"model.layers.86.self_attn.q_proj.weight": "model-00050-of-00051.safetensors",
|
781 |
+
"model.layers.86.self_attn.v_proj.weight": "model-00050-of-00051.safetensors",
|
782 |
+
"model.layers.87.input_layernorm.weight": "model-00051-of-00051.safetensors",
|
783 |
+
"model.layers.87.mlp.down_proj.weight": "model-00051-of-00051.safetensors",
|
784 |
+
"model.layers.87.mlp.gate_proj.weight": "model-00051-of-00051.safetensors",
|
785 |
+
"model.layers.87.mlp.up_proj.weight": "model-00051-of-00051.safetensors",
|
786 |
+
"model.layers.87.post_attention_layernorm.weight": "model-00051-of-00051.safetensors",
|
787 |
+
"model.layers.87.self_attn.k_proj.weight": "model-00050-of-00051.safetensors",
|
788 |
+
"model.layers.87.self_attn.o_proj.weight": "model-00050-of-00051.safetensors",
|
789 |
+
"model.layers.87.self_attn.q_proj.weight": "model-00050-of-00051.safetensors",
|
790 |
+
"model.layers.87.self_attn.v_proj.weight": "model-00050-of-00051.safetensors",
|
791 |
+
"model.layers.9.input_layernorm.weight": "model-00006-of-00051.safetensors",
|
792 |
+
"model.layers.9.mlp.down_proj.weight": "model-00006-of-00051.safetensors",
|
793 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00006-of-00051.safetensors",
|
794 |
+
"model.layers.9.mlp.up_proj.weight": "model-00006-of-00051.safetensors",
|
795 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00006-of-00051.safetensors",
|
796 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00006-of-00051.safetensors",
|
797 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00006-of-00051.safetensors",
|
798 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00006-of-00051.safetensors",
|
799 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00006-of-00051.safetensors",
|
800 |
+
"model.norm.weight": "model-00051-of-00051.safetensors"
|
801 |
+
}
|
802 |
+
}
|
output-00001-of-00005.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3402e5e5d65193742cd4583e43f0f626921c77c50bca74ccce99ceabd4eb107
|
3 |
+
size 8512296998
|
output-00002-of-00005.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2dd1f49af02a96d17b6eabd8752f841e736b7af5c82f7e0b169cb8adcef78bbd
|
3 |
+
size 8532780168
|
output-00003-of-00005.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db6fe117aa33b563ed52db06fe7c2fec365812b8ee36485e465a2046856f89e2
|
3 |
+
size 8497460202
|
output-00004-of-00005.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:589cb6476df56263c295a59022baa3fb39a26572811225a799e33e8c2c0e6b41
|
3 |
+
size 8561325818
|
output-00005-of-00005.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2fd3cda37f52c327f288fb0562f520d672bd84ee3d976916df4001b6db6ddba
|
3 |
+
size 5082888500
|
params.json
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dim": 12288,
|
3 |
+
"n_layers": 88,
|
4 |
+
"head_dim": 128,
|
5 |
+
"hidden_dim": 28672,
|
6 |
+
"n_heads": 96,
|
7 |
+
"n_kv_heads": 8,
|
8 |
+
"norm_eps": 1e-05,
|
9 |
+
"vocab_size": 32768,
|
10 |
+
"rope_theta": 1000000.0
|
11 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"unk_token": {
|
17 |
+
"content": "<unk>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
}
|
23 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b968b8dc352f42192367337c78ccc61e1eaddc6d641a579372d4f20694beb7a
|
3 |
+
size 587562
|
tokenizer.model.v7
ADDED
Binary file (588 kB). View file
|
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|