Upload LlamaForCausalLM
Browse files- README.md +160 -169
- config.json +2 -1
- generation_config.json +3 -3
- model-00001-of-00017.safetensors +3 -0
- model-00002-of-00017.safetensors +3 -0
- model-00003-of-00017.safetensors +3 -0
- model-00004-of-00017.safetensors +3 -0
- model-00005-of-00017.safetensors +3 -0
- model-00006-of-00017.safetensors +3 -0
- model-00007-of-00017.safetensors +3 -0
- model-00008-of-00017.safetensors +3 -0
- model-00009-of-00017.safetensors +3 -0
- model-00010-of-00017.safetensors +3 -0
- model-00011-of-00017.safetensors +3 -0
- model-00012-of-00017.safetensors +3 -0
- model-00013-of-00017.safetensors +3 -0
- model-00014-of-00017.safetensors +3 -0
- model-00015-of-00017.safetensors +3 -0
- model-00016-of-00017.safetensors +3 -0
- model-00017-of-00017.safetensors +3 -0
- model.safetensors.index.json +291 -291
README.md
CHANGED
@@ -1,189 +1,180 @@
|
|
1 |
---
|
2 |
language:
|
3 |
- en
|
4 |
-
|
5 |
tags:
|
6 |
- facebook
|
7 |
- meta
|
8 |
- pytorch
|
9 |
- llama
|
10 |
- llama-3
|
11 |
-
|
|
|
12 |
license_name: llama3
|
13 |
license_link: LICENSE
|
14 |
-
extra_gated_prompt:
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
"
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
"Meta
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
2. Generating, promoting, or furthering defamatory content, including the creation of defamatory statements, images, or other content
|
165 |
-
3. Generating, promoting, or further distributing spam
|
166 |
-
4. Impersonating another individual without consent, authorization, or legal right
|
167 |
-
5. Representing that the use of Meta Llama 3 or outputs are human-generated
|
168 |
-
6. Generating or facilitating false online engagement, including fake reviews and other means of fake online engagement
|
169 |
-
4. Fail to appropriately disclose to end users any known dangers of your AI system
|
170 |
-
|
171 |
-
Please report any violation of this Policy, software “bug,” or other problems that could lead to a violation
|
172 |
-
of this Policy through one of the following means:
|
173 |
-
* Reporting issues with the model: [https://github.com/meta-llama/llama3](https://github.com/meta-llama/llama3)
|
174 |
-
* Reporting risky content generated by the model:
|
175 |
-
developers.facebook.com/llama_output_feedback
|
176 |
-
* Reporting bugs and security concerns: facebook.com/whitehat/info
|
177 |
-
* Reporting violations of the Acceptable Use Policy or unlicensed uses of Meta Llama 3: LlamaUseReport@meta.com
|
178 |
extra_gated_fields:
|
179 |
First Name: text
|
180 |
Last Name: text
|
181 |
Date of birth: date_picker
|
182 |
Country: country
|
183 |
Affiliation: text
|
184 |
-
geo: ip_location
|
185 |
-
By clicking Submit below I accept the terms of the license and acknowledge that
|
186 |
-
|
|
|
|
|
|
|
|
|
187 |
extra_gated_button_content: Submit
|
188 |
---
|
189 |
|
|
|
1 |
---
|
2 |
language:
|
3 |
- en
|
4 |
+
license: other
|
5 |
tags:
|
6 |
- facebook
|
7 |
- meta
|
8 |
- pytorch
|
9 |
- llama
|
10 |
- llama-3
|
11 |
+
- llama-factory
|
12 |
+
pipeline_tag: text-generation
|
13 |
license_name: llama3
|
14 |
license_link: LICENSE
|
15 |
+
extra_gated_prompt: "### META LLAMA 3 COMMUNITY LICENSE AGREEMENT\nMeta Llama 3 Version\
|
16 |
+
\ Release Date: April 18, 2024\n\"Agreement\" means the terms and conditions for\
|
17 |
+
\ use, reproduction, distribution and modification of the Llama Materials set forth\
|
18 |
+
\ herein.\n\"Documentation\" means the specifications, manuals and documentation\
|
19 |
+
\ accompanying Meta Llama 3 distributed by Meta at https://llama.meta.com/get-started/.\n\
|
20 |
+
\"Licensee\" or \"you\" means you, or your employer or any other person or entity\
|
21 |
+
\ (if you are entering into this Agreement on such person or entity’s behalf), of\
|
22 |
+
\ the age required under applicable laws, rules or regulations to provide legal\
|
23 |
+
\ consent and that has legal authority to bind your employer or such other person\
|
24 |
+
\ or entity if you are entering in this Agreement on their behalf.\n\"Meta Llama\
|
25 |
+
\ 3\" means the foundational large language models and software and algorithms,\
|
26 |
+
\ including machine-learning model code, trained model weights, inference-enabling\
|
27 |
+
\ code, training-enabling code, fine-tuning enabling code and other elements of\
|
28 |
+
\ the foregoing distributed by Meta at https://llama.meta.com/llama-downloads.\n\
|
29 |
+
\"Llama Materials\" means, collectively, Meta’s proprietary Meta Llama 3 and Documentation\
|
30 |
+
\ (and any portion thereof) made available under this Agreement.\n\"Meta\" or \"\
|
31 |
+
we\" means Meta Platforms Ireland Limited (if you are located in or, if you are\
|
32 |
+
\ an entity, your principal place of business is in the EEA or Switzerland) and\
|
33 |
+
\ Meta Platforms, Inc. (if you are located outside of the EEA or Switzerland).\n\
|
34 |
+
\ \n1. License Rights and Redistribution.\na. Grant of Rights. You are granted\
|
35 |
+
\ a non-exclusive, worldwide, non-transferable and royalty-free limited license\
|
36 |
+
\ under Meta’s intellectual property or other rights owned by Meta embodied in the\
|
37 |
+
\ Llama Materials to use, reproduce, distribute, copy, create derivative works of,\
|
38 |
+
\ and make modifications to the Llama Materials.\nb. Redistribution and Use.\ni.\
|
39 |
+
\ If you distribute or make available the Llama Materials (or any derivative works\
|
40 |
+
\ thereof), or a product or service that uses any of them, including another AI\
|
41 |
+
\ model, you shall (A) provide a copy of this Agreement with any such Llama Materials;\
|
42 |
+
\ and (B) prominently display “Built with Meta Llama 3” on a related website, user\
|
43 |
+
\ interface, blogpost, about page, or product documentation. If you use the Llama\
|
44 |
+
\ Materials to create, train, fine tune, or otherwise improve an AI model, which\
|
45 |
+
\ is distributed or made available, you shall also include “Llama 3” at the beginning\
|
46 |
+
\ of any such AI model name.\nii. If you receive Llama Materials, or any derivative\
|
47 |
+
\ works thereof, from a Licensee as part of an integrated end user product, then\
|
48 |
+
\ Section 2 of this Agreement will not apply to you.\niii. You must retain in all\
|
49 |
+
\ copies of the Llama Materials that you distribute the following attribution notice\
|
50 |
+
\ within a “Notice” text file distributed as a part of such copies: “Meta Llama\
|
51 |
+
\ 3 is licensed under the Meta Llama 3 Community License, Copyright © Meta Platforms,\
|
52 |
+
\ Inc. All Rights Reserved.”\niv. Your use of the Llama Materials must comply with\
|
53 |
+
\ applicable laws and regulations (including trade compliance laws and regulations)\
|
54 |
+
\ and adhere to the Acceptable Use Policy for the Llama Materials (available at\
|
55 |
+
\ https://llama.meta.com/llama3/use-policy), which is hereby incorporated by reference\
|
56 |
+
\ into this Agreement.\nv. You will not use the Llama Materials or any output or\
|
57 |
+
\ results of the Llama Materials to improve any other large language model (excluding\
|
58 |
+
\ Meta Llama 3 or derivative works thereof).\n2. Additional Commercial Terms. If,\
|
59 |
+
\ on the Meta Llama 3 version release date, the monthly active users of the products\
|
60 |
+
\ or services made available by or for Licensee, or Licensee’s affiliates, is greater\
|
61 |
+
\ than 700 million monthly active users in the preceding calendar month, you must\
|
62 |
+
\ request a license from Meta, which Meta may grant to you in its sole discretion,\
|
63 |
+
\ and you are not authorized to exercise any of the rights under this Agreement\
|
64 |
+
\ unless or until Meta otherwise expressly grants you such rights.\n3. Disclaimer\
|
65 |
+
\ of Warranty. UNLESS REQUIRED BY APPLICABLE LAW, THE LLAMA MATERIALS AND ANY OUTPUT\
|
66 |
+
\ AND RESULTS THEREFROM ARE PROVIDED ON AN “AS IS” BASIS, WITHOUT WARRANTIES OF\
|
67 |
+
\ ANY KIND, AND META DISCLAIMS ALL WARRANTIES OF ANY KIND, BOTH EXPRESS AND IMPLIED,\
|
68 |
+
\ INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY,\
|
69 |
+
\ OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING\
|
70 |
+
\ THE APPROPRIATENESS OF USING OR REDISTRIBUTING THE LLAMA MATERIALS AND ASSUME\
|
71 |
+
\ ANY RISKS ASSOCIATED WITH YOUR USE OF THE LLAMA MATERIALS AND ANY OUTPUT AND RESULTS.\n\
|
72 |
+
4. Limitation of Liability. IN NO EVENT WILL META OR ITS AFFILIATES BE LIABLE UNDER\
|
73 |
+
\ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY,\
|
74 |
+
\ OR OTHERWISE, ARISING OUT OF THIS AGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT,\
|
75 |
+
\ SPECIAL, CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN IF META\
|
76 |
+
\ OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING.\n\
|
77 |
+
5. Intellectual Property.\na. No trademark licenses are granted under this Agreement,\
|
78 |
+
\ and in connection with the Llama Materials, neither Meta nor Licensee may use\
|
79 |
+
\ any name or mark owned by or associated with the other or any of its affiliates,\
|
80 |
+
\ except as required for reasonable and customary use in describing and redistributing\
|
81 |
+
\ the Llama Materials or as set forth in this Section 5(a). Meta hereby grants you\
|
82 |
+
\ a license to use “Llama 3” (the “Mark”) solely as required to comply with the\
|
83 |
+
\ last sentence of Section 1.b.i. You will comply with Meta’s brand guidelines (currently\
|
84 |
+
\ accessible at https://about.meta.com/brand/resources/meta/company-brand/ ). All\
|
85 |
+
\ goodwill arising out of your use of the Mark will inure to the benefit of Meta.\n\
|
86 |
+
b. Subject to Meta’s ownership of Llama Materials and derivatives made by or for\
|
87 |
+
\ Meta, with respect to any derivative works and modifications of the Llama Materials\
|
88 |
+
\ that are made by you, as between you and Meta, you are and will be the owner of\
|
89 |
+
\ such derivative works and modifications.\nc. If you institute litigation or other\
|
90 |
+
\ proceedings against Meta or any entity (including a cross-claim or counterclaim\
|
91 |
+
\ in a lawsuit) alleging that the Llama Materials or Meta Llama 3 outputs or results,\
|
92 |
+
\ or any portion of any of the foregoing, constitutes infringement of intellectual\
|
93 |
+
\ property or other rights owned or licensable by you, then any licenses granted\
|
94 |
+
\ to you under this Agreement shall terminate as of the date such litigation or\
|
95 |
+
\ claim is filed or instituted. You will indemnify and hold harmless Meta from and\
|
96 |
+
\ against any claim by any third party arising out of or related to your use or\
|
97 |
+
\ distribution of the Llama Materials.\n6. Term and Termination. The term of this\
|
98 |
+
\ Agreement will commence upon your acceptance of this Agreement or access to the\
|
99 |
+
\ Llama Materials and will continue in full force and effect until terminated in\
|
100 |
+
\ accordance with the terms and conditions herein. Meta may terminate this Agreement\
|
101 |
+
\ if you are in breach of any term or condition of this Agreement. Upon termination\
|
102 |
+
\ of this Agreement, you shall delete and cease use of the Llama Materials. Sections\
|
103 |
+
\ 3, 4 and 7 shall survive the termination of this Agreement.\n7. Governing Law\
|
104 |
+
\ and Jurisdiction. This Agreement will be governed and construed under the laws\
|
105 |
+
\ of the State of California without regard to choice of law principles, and the\
|
106 |
+
\ UN Convention on Contracts for the International Sale of Goods does not apply\
|
107 |
+
\ to this Agreement. The courts of California shall have exclusive jurisdiction\
|
108 |
+
\ of any dispute arising out of this Agreement.\n### Meta Llama 3 Acceptable Use\
|
109 |
+
\ Policy\nMeta is committed to promoting safe and fair use of its tools and features,\
|
110 |
+
\ including Meta Llama 3. If you access or use Meta Llama 3, you agree to this Acceptable\
|
111 |
+
\ Use Policy (“Policy”). The most recent copy of this policy can be found at [https://llama.meta.com/llama3/use-policy](https://llama.meta.com/llama3/use-policy)\n\
|
112 |
+
#### Prohibited Uses\nWe want everyone to use Meta Llama 3 safely and responsibly.\
|
113 |
+
\ You agree you will not use, or allow others to use, Meta Llama 3 to: 1. Violate\
|
114 |
+
\ the law or others’ rights, including to:\n 1. Engage in, promote, generate,\
|
115 |
+
\ contribute to, encourage, plan, incite, or further illegal or unlawful activity\
|
116 |
+
\ or content, such as:\n 1. Violence or terrorism\n 2. Exploitation\
|
117 |
+
\ or harm to children, including the solicitation, creation, acquisition, or dissemination\
|
118 |
+
\ of child exploitative content or failure to report Child Sexual Abuse Material\n\
|
119 |
+
\ 3. Human trafficking, exploitation, and sexual violence\n 4. The\
|
120 |
+
\ illegal distribution of information or materials to minors, including obscene\
|
121 |
+
\ materials, or failure to employ legally required age-gating in connection with\
|
122 |
+
\ such information or materials.\n 5. Sexual solicitation\n 6. Any\
|
123 |
+
\ other criminal activity\n 2. Engage in, promote, incite, or facilitate the\
|
124 |
+
\ harassment, abuse, threatening, or bullying of individuals or groups of individuals\n\
|
125 |
+
\ 3. Engage in, promote, incite, or facilitate discrimination or other unlawful\
|
126 |
+
\ or harmful conduct in the provision of employment, employment benefits, credit,\
|
127 |
+
\ housing, other economic benefits, or other essential goods and services\n 4.\
|
128 |
+
\ Engage in the unauthorized or unlicensed practice of any profession including,\
|
129 |
+
\ but not limited to, financial, legal, medical/health, or related professional\
|
130 |
+
\ practices\n 5. Collect, process, disclose, generate, or infer health, demographic,\
|
131 |
+
\ or other sensitive personal or private information about individuals without rights\
|
132 |
+
\ and consents required by applicable laws\n 6. Engage in or facilitate any action\
|
133 |
+
\ or generate any content that infringes, misappropriates, or otherwise violates\
|
134 |
+
\ any third-party rights, including the outputs or results of any products or services\
|
135 |
+
\ using the Llama Materials\n 7. Create, generate, or facilitate the creation\
|
136 |
+
\ of malicious code, malware, computer viruses or do anything else that could disable,\
|
137 |
+
\ overburden, interfere with or impair the proper working, integrity, operation\
|
138 |
+
\ or appearance of a website or computer system\n2. Engage in, promote, incite,\
|
139 |
+
\ facilitate, or assist in the planning or development of activities that present\
|
140 |
+
\ a risk of death or bodily harm to individuals, including use of Meta Llama 3 related\
|
141 |
+
\ to the following:\n 1. Military, warfare, nuclear industries or applications,\
|
142 |
+
\ espionage, use for materials or activities that are subject to the International\
|
143 |
+
\ Traffic Arms Regulations (ITAR) maintained by the United States Department of\
|
144 |
+
\ State\n 2. Guns and illegal weapons (including weapon development)\n 3.\
|
145 |
+
\ Illegal drugs and regulated/controlled substances\n 4. Operation of critical\
|
146 |
+
\ infrastructure, transportation technologies, or heavy machinery\n 5. Self-harm\
|
147 |
+
\ or harm to others, including suicide, cutting, and eating disorders\n 6. Any\
|
148 |
+
\ content intended to incite or promote violence, abuse, or any infliction of bodily\
|
149 |
+
\ harm to an individual\n3. Intentionally deceive or mislead others, including use\
|
150 |
+
\ of Meta Llama 3 related to the following:\n 1. Generating, promoting, or furthering\
|
151 |
+
\ fraud or the creation or promotion of disinformation\n 2. Generating, promoting,\
|
152 |
+
\ or furthering defamatory content, including the creation of defamatory statements,\
|
153 |
+
\ images, or other content\n 3. Generating, promoting, or further distributing\
|
154 |
+
\ spam\n 4. Impersonating another individual without consent, authorization,\
|
155 |
+
\ or legal right\n 5. Representing that the use of Meta Llama 3 or outputs are\
|
156 |
+
\ human-generated\n 6. Generating or facilitating false online engagement, including\
|
157 |
+
\ fake reviews and other means of fake online engagement\n4. Fail to appropriately\
|
158 |
+
\ disclose to end users any known dangers of your AI system\nPlease report any violation\
|
159 |
+
\ of this Policy, software “bug,” or other problems that could lead to a violation\
|
160 |
+
\ of this Policy through one of the following means:\n * Reporting issues with\
|
161 |
+
\ the model: [https://github.com/meta-llama/llama3](https://github.com/meta-llama/llama3)\n\
|
162 |
+
\ * Reporting risky content generated by the model:\n developers.facebook.com/llama_output_feedback\n\
|
163 |
+
\ * Reporting bugs and security concerns: facebook.com/whitehat/info\n * Reporting\
|
164 |
+
\ violations of the Acceptable Use Policy or unlicensed uses of Meta Llama 3: LlamaUseReport@meta.com"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
extra_gated_fields:
|
166 |
First Name: text
|
167 |
Last Name: text
|
168 |
Date of birth: date_picker
|
169 |
Country: country
|
170 |
Affiliation: text
|
171 |
+
geo: ip_location
|
172 |
+
? By clicking Submit below I accept the terms of the license and acknowledge that
|
173 |
+
the information I provide will be collected stored processed and shared in accordance
|
174 |
+
with the Meta Privacy Policy
|
175 |
+
: checkbox
|
176 |
+
extra_gated_description: The information you provide will be collected, stored, processed
|
177 |
+
and shared in accordance with the [Meta Privacy Policy](https://www.facebook.com/privacy/policy/).
|
178 |
extra_gated_button_content: Submit
|
179 |
---
|
180 |
|
config.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"architectures": [
|
3 |
"LlamaForCausalLM"
|
4 |
],
|
@@ -21,7 +22,7 @@
|
|
21 |
"rope_theta": 500000.0,
|
22 |
"tie_word_embeddings": false,
|
23 |
"torch_dtype": "bfloat16",
|
24 |
-
"transformers_version": "4.40.
|
25 |
"use_cache": true,
|
26 |
"vocab_size": 128256
|
27 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "arml/ifit-aic-nasm-tailor",
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
|
|
22 |
"rope_theta": 500000.0,
|
23 |
"tie_word_embeddings": false,
|
24 |
"torch_dtype": "bfloat16",
|
25 |
+
"transformers_version": "4.40.1",
|
26 |
"use_cache": true,
|
27 |
"vocab_size": 128256
|
28 |
}
|
generation_config.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"bos_token_id": 128000,
|
3 |
-
"eos_token_id": 128001,
|
4 |
"do_sample": true,
|
5 |
-
"
|
6 |
"max_length": 4096,
|
|
|
7 |
"top_p": 0.9,
|
8 |
-
"transformers_version": "4.40.
|
9 |
}
|
|
|
1 |
{
|
2 |
"bos_token_id": 128000,
|
|
|
3 |
"do_sample": true,
|
4 |
+
"eos_token_id": 128001,
|
5 |
"max_length": 4096,
|
6 |
+
"temperature": 0.6,
|
7 |
"top_p": 0.9,
|
8 |
+
"transformers_version": "4.40.1"
|
9 |
}
|
model-00001-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ee9957505da85664bcb0baa942d43fb9dff18ed637ec169f36c545eef0659f0
|
3 |
+
size 1050673296
|
model-00002-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80fc4f7e7987b45deca6ed2c1cb03c7f4ca3feb47db5096cb6b05245fa617664
|
3 |
+
size 956336616
|
model-00003-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4678502ca79b4c61b73bfd68cd67999ea07df5c24fa30dec5eb382f20256003
|
3 |
+
size 989890696
|
model-00004-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8498cf1a042084ad1f3a2c42b4092ba2f743f43fe2287c2bdf846ca6d1294b39
|
3 |
+
size 989890696
|
model-00005-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6113331a4203d90393bfefeacc37a43767117ef48c2821772921a65691ceac88
|
3 |
+
size 989907312
|
model-00006-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3553b1f6876ea05e9d28e41ec678553b69f45191bc02444b006c2f6e14ae36db
|
3 |
+
size 956336624
|
model-00007-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ceb479c43b95b36f04f2114aee031e25087f1cb8528b3944f0e757c142ababe3
|
3 |
+
size 989890720
|
model-00008-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54c22c15f9da2032f0c9c99c3ad98b007a9cbdae8f907550876522b579ca9a4b
|
3 |
+
size 989890712
|
model-00009-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c36647d6c1542e2467f61ba065fa293f949f09c7f14d33f5ef378c1734281f7
|
3 |
+
size 989907328
|
model-00010-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:310c6a94371869d319b5b2ff10eb74e27d8343a447446a96ab3e35ba570c1be2
|
3 |
+
size 956336632
|
model-00011-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a11780fec9579dfbe427f861f495cee9e3742114a0282a875a9551da4891761
|
3 |
+
size 989890720
|
model-00012-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4821ba5c4321ff1dec86e64b3a615975f6a28556af0a7c91dc2974303b38345f
|
3 |
+
size 989890712
|
model-00013-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4eeca03589c2f4864bf5dacf57f1ceac773a8ef6d603a7d9dbfc2a805787e1d5
|
3 |
+
size 989907328
|
model-00014-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98fc607c20817e8cfcf05267e7e7fc523283780c6abd234019a1d32158591f24
|
3 |
+
size 956336632
|
model-00015-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:979715d55faf2210c594f08de64f5eba468a47dff09337aa776e9394c1c1d118
|
3 |
+
size 989890720
|
model-00016-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7127c9420b743238bef2514e19e97ee7fb82c8e09b3ec2680ca0bdb2322bad6
|
3 |
+
size 234906168
|
model-00017-of-00017.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c5394fe848f6eafef2a44f085e7b72e5ef0d315f98aa87b3827c2b1e6983b89
|
3 |
+
size 1050673280
|
model.safetensors.index.json
CHANGED
@@ -3,296 +3,296 @@
|
|
3 |
"total_size": 16060522496
|
4 |
},
|
5 |
"weight_map": {
|
6 |
-
"lm_head.weight": "model-
|
7 |
-
"model.embed_tokens.weight": "model-00001-of-
|
8 |
-
"model.layers.0.input_layernorm.weight": "model-
|
9 |
-
"model.layers.0.mlp.down_proj.weight": "model-
|
10 |
-
"model.layers.0.mlp.gate_proj.weight": "model-
|
11 |
-
"model.layers.0.mlp.up_proj.weight": "model-
|
12 |
-
"model.layers.0.post_attention_layernorm.weight": "model-
|
13 |
-
"model.layers.0.self_attn.k_proj.weight": "model-
|
14 |
-
"model.layers.0.self_attn.o_proj.weight": "model-
|
15 |
-
"model.layers.0.self_attn.q_proj.weight": "model-
|
16 |
-
"model.layers.0.self_attn.v_proj.weight": "model-
|
17 |
-
"model.layers.1.input_layernorm.weight": "model-
|
18 |
-
"model.layers.1.mlp.down_proj.weight": "model-
|
19 |
-
"model.layers.1.mlp.gate_proj.weight": "model-
|
20 |
-
"model.layers.1.mlp.up_proj.weight": "model-
|
21 |
-
"model.layers.1.post_attention_layernorm.weight": "model-
|
22 |
-
"model.layers.1.self_attn.k_proj.weight": "model-
|
23 |
-
"model.layers.1.self_attn.o_proj.weight": "model-
|
24 |
-
"model.layers.1.self_attn.q_proj.weight": "model-
|
25 |
-
"model.layers.1.self_attn.v_proj.weight": "model-
|
26 |
-
"model.layers.10.input_layernorm.weight": "model-
|
27 |
-
"model.layers.10.mlp.down_proj.weight": "model-
|
28 |
-
"model.layers.10.mlp.gate_proj.weight": "model-
|
29 |
-
"model.layers.10.mlp.up_proj.weight": "model-
|
30 |
-
"model.layers.10.post_attention_layernorm.weight": "model-
|
31 |
-
"model.layers.10.self_attn.k_proj.weight": "model-
|
32 |
-
"model.layers.10.self_attn.o_proj.weight": "model-
|
33 |
-
"model.layers.10.self_attn.q_proj.weight": "model-
|
34 |
-
"model.layers.10.self_attn.v_proj.weight": "model-
|
35 |
-
"model.layers.11.input_layernorm.weight": "model-
|
36 |
-
"model.layers.11.mlp.down_proj.weight": "model-
|
37 |
-
"model.layers.11.mlp.gate_proj.weight": "model-
|
38 |
-
"model.layers.11.mlp.up_proj.weight": "model-
|
39 |
-
"model.layers.11.post_attention_layernorm.weight": "model-
|
40 |
-
"model.layers.11.self_attn.k_proj.weight": "model-
|
41 |
-
"model.layers.11.self_attn.o_proj.weight": "model-
|
42 |
-
"model.layers.11.self_attn.q_proj.weight": "model-
|
43 |
-
"model.layers.11.self_attn.v_proj.weight": "model-
|
44 |
-
"model.layers.12.input_layernorm.weight": "model-
|
45 |
-
"model.layers.12.mlp.down_proj.weight": "model-
|
46 |
-
"model.layers.12.mlp.gate_proj.weight": "model-
|
47 |
-
"model.layers.12.mlp.up_proj.weight": "model-
|
48 |
-
"model.layers.12.post_attention_layernorm.weight": "model-
|
49 |
-
"model.layers.12.self_attn.k_proj.weight": "model-
|
50 |
-
"model.layers.12.self_attn.o_proj.weight": "model-
|
51 |
-
"model.layers.12.self_attn.q_proj.weight": "model-
|
52 |
-
"model.layers.12.self_attn.v_proj.weight": "model-
|
53 |
-
"model.layers.13.input_layernorm.weight": "model-
|
54 |
-
"model.layers.13.mlp.down_proj.weight": "model-
|
55 |
-
"model.layers.13.mlp.gate_proj.weight": "model-
|
56 |
-
"model.layers.13.mlp.up_proj.weight": "model-
|
57 |
-
"model.layers.13.post_attention_layernorm.weight": "model-
|
58 |
-
"model.layers.13.self_attn.k_proj.weight": "model-
|
59 |
-
"model.layers.13.self_attn.o_proj.weight": "model-
|
60 |
-
"model.layers.13.self_attn.q_proj.weight": "model-
|
61 |
-
"model.layers.13.self_attn.v_proj.weight": "model-
|
62 |
-
"model.layers.14.input_layernorm.weight": "model-
|
63 |
-
"model.layers.14.mlp.down_proj.weight": "model-
|
64 |
-
"model.layers.14.mlp.gate_proj.weight": "model-
|
65 |
-
"model.layers.14.mlp.up_proj.weight": "model-
|
66 |
-
"model.layers.14.post_attention_layernorm.weight": "model-
|
67 |
-
"model.layers.14.self_attn.k_proj.weight": "model-
|
68 |
-
"model.layers.14.self_attn.o_proj.weight": "model-
|
69 |
-
"model.layers.14.self_attn.q_proj.weight": "model-
|
70 |
-
"model.layers.14.self_attn.v_proj.weight": "model-
|
71 |
-
"model.layers.15.input_layernorm.weight": "model-
|
72 |
-
"model.layers.15.mlp.down_proj.weight": "model-
|
73 |
-
"model.layers.15.mlp.gate_proj.weight": "model-
|
74 |
-
"model.layers.15.mlp.up_proj.weight": "model-
|
75 |
-
"model.layers.15.post_attention_layernorm.weight": "model-
|
76 |
-
"model.layers.15.self_attn.k_proj.weight": "model-
|
77 |
-
"model.layers.15.self_attn.o_proj.weight": "model-
|
78 |
-
"model.layers.15.self_attn.q_proj.weight": "model-
|
79 |
-
"model.layers.15.self_attn.v_proj.weight": "model-
|
80 |
-
"model.layers.16.input_layernorm.weight": "model-
|
81 |
-
"model.layers.16.mlp.down_proj.weight": "model-
|
82 |
-
"model.layers.16.mlp.gate_proj.weight": "model-
|
83 |
-
"model.layers.16.mlp.up_proj.weight": "model-
|
84 |
-
"model.layers.16.post_attention_layernorm.weight": "model-
|
85 |
-
"model.layers.16.self_attn.k_proj.weight": "model-
|
86 |
-
"model.layers.16.self_attn.o_proj.weight": "model-
|
87 |
-
"model.layers.16.self_attn.q_proj.weight": "model-
|
88 |
-
"model.layers.16.self_attn.v_proj.weight": "model-
|
89 |
-
"model.layers.17.input_layernorm.weight": "model-
|
90 |
-
"model.layers.17.mlp.down_proj.weight": "model-
|
91 |
-
"model.layers.17.mlp.gate_proj.weight": "model-
|
92 |
-
"model.layers.17.mlp.up_proj.weight": "model-
|
93 |
-
"model.layers.17.post_attention_layernorm.weight": "model-
|
94 |
-
"model.layers.17.self_attn.k_proj.weight": "model-
|
95 |
-
"model.layers.17.self_attn.o_proj.weight": "model-
|
96 |
-
"model.layers.17.self_attn.q_proj.weight": "model-
|
97 |
-
"model.layers.17.self_attn.v_proj.weight": "model-
|
98 |
-
"model.layers.18.input_layernorm.weight": "model-
|
99 |
-
"model.layers.18.mlp.down_proj.weight": "model-
|
100 |
-
"model.layers.18.mlp.gate_proj.weight": "model-
|
101 |
-
"model.layers.18.mlp.up_proj.weight": "model-
|
102 |
-
"model.layers.18.post_attention_layernorm.weight": "model-
|
103 |
-
"model.layers.18.self_attn.k_proj.weight": "model-
|
104 |
-
"model.layers.18.self_attn.o_proj.weight": "model-
|
105 |
-
"model.layers.18.self_attn.q_proj.weight": "model-
|
106 |
-
"model.layers.18.self_attn.v_proj.weight": "model-
|
107 |
-
"model.layers.19.input_layernorm.weight": "model-
|
108 |
-
"model.layers.19.mlp.down_proj.weight": "model-
|
109 |
-
"model.layers.19.mlp.gate_proj.weight": "model-
|
110 |
-
"model.layers.19.mlp.up_proj.weight": "model-
|
111 |
-
"model.layers.19.post_attention_layernorm.weight": "model-
|
112 |
-
"model.layers.19.self_attn.k_proj.weight": "model-
|
113 |
-
"model.layers.19.self_attn.o_proj.weight": "model-
|
114 |
-
"model.layers.19.self_attn.q_proj.weight": "model-
|
115 |
-
"model.layers.19.self_attn.v_proj.weight": "model-
|
116 |
-
"model.layers.2.input_layernorm.weight": "model-
|
117 |
-
"model.layers.2.mlp.down_proj.weight": "model-
|
118 |
-
"model.layers.2.mlp.gate_proj.weight": "model-
|
119 |
-
"model.layers.2.mlp.up_proj.weight": "model-
|
120 |
-
"model.layers.2.post_attention_layernorm.weight": "model-
|
121 |
-
"model.layers.2.self_attn.k_proj.weight": "model-
|
122 |
-
"model.layers.2.self_attn.o_proj.weight": "model-
|
123 |
-
"model.layers.2.self_attn.q_proj.weight": "model-
|
124 |
-
"model.layers.2.self_attn.v_proj.weight": "model-
|
125 |
-
"model.layers.20.input_layernorm.weight": "model-
|
126 |
-
"model.layers.20.mlp.down_proj.weight": "model-
|
127 |
-
"model.layers.20.mlp.gate_proj.weight": "model-
|
128 |
-
"model.layers.20.mlp.up_proj.weight": "model-
|
129 |
-
"model.layers.20.post_attention_layernorm.weight": "model-
|
130 |
-
"model.layers.20.self_attn.k_proj.weight": "model-
|
131 |
-
"model.layers.20.self_attn.o_proj.weight": "model-
|
132 |
-
"model.layers.20.self_attn.q_proj.weight": "model-
|
133 |
-
"model.layers.20.self_attn.v_proj.weight": "model-
|
134 |
-
"model.layers.21.input_layernorm.weight": "model-
|
135 |
-
"model.layers.21.mlp.down_proj.weight": "model-
|
136 |
-
"model.layers.21.mlp.gate_proj.weight": "model-
|
137 |
-
"model.layers.21.mlp.up_proj.weight": "model-
|
138 |
-
"model.layers.21.post_attention_layernorm.weight": "model-
|
139 |
-
"model.layers.21.self_attn.k_proj.weight": "model-
|
140 |
-
"model.layers.21.self_attn.o_proj.weight": "model-
|
141 |
-
"model.layers.21.self_attn.q_proj.weight": "model-
|
142 |
-
"model.layers.21.self_attn.v_proj.weight": "model-
|
143 |
-
"model.layers.22.input_layernorm.weight": "model-
|
144 |
-
"model.layers.22.mlp.down_proj.weight": "model-
|
145 |
-
"model.layers.22.mlp.gate_proj.weight": "model-
|
146 |
-
"model.layers.22.mlp.up_proj.weight": "model-
|
147 |
-
"model.layers.22.post_attention_layernorm.weight": "model-
|
148 |
-
"model.layers.22.self_attn.k_proj.weight": "model-
|
149 |
-
"model.layers.22.self_attn.o_proj.weight": "model-
|
150 |
-
"model.layers.22.self_attn.q_proj.weight": "model-
|
151 |
-
"model.layers.22.self_attn.v_proj.weight": "model-
|
152 |
-
"model.layers.23.input_layernorm.weight": "model-
|
153 |
-
"model.layers.23.mlp.down_proj.weight": "model-
|
154 |
-
"model.layers.23.mlp.gate_proj.weight": "model-
|
155 |
-
"model.layers.23.mlp.up_proj.weight": "model-
|
156 |
-
"model.layers.23.post_attention_layernorm.weight": "model-
|
157 |
-
"model.layers.23.self_attn.k_proj.weight": "model-
|
158 |
-
"model.layers.23.self_attn.o_proj.weight": "model-
|
159 |
-
"model.layers.23.self_attn.q_proj.weight": "model-
|
160 |
-
"model.layers.23.self_attn.v_proj.weight": "model-
|
161 |
-
"model.layers.24.input_layernorm.weight": "model-
|
162 |
-
"model.layers.24.mlp.down_proj.weight": "model-
|
163 |
-
"model.layers.24.mlp.gate_proj.weight": "model-
|
164 |
-
"model.layers.24.mlp.up_proj.weight": "model-
|
165 |
-
"model.layers.24.post_attention_layernorm.weight": "model-
|
166 |
-
"model.layers.24.self_attn.k_proj.weight": "model-
|
167 |
-
"model.layers.24.self_attn.o_proj.weight": "model-
|
168 |
-
"model.layers.24.self_attn.q_proj.weight": "model-
|
169 |
-
"model.layers.24.self_attn.v_proj.weight": "model-
|
170 |
-
"model.layers.25.input_layernorm.weight": "model-
|
171 |
-
"model.layers.25.mlp.down_proj.weight": "model-
|
172 |
-
"model.layers.25.mlp.gate_proj.weight": "model-
|
173 |
-
"model.layers.25.mlp.up_proj.weight": "model-
|
174 |
-
"model.layers.25.post_attention_layernorm.weight": "model-
|
175 |
-
"model.layers.25.self_attn.k_proj.weight": "model-
|
176 |
-
"model.layers.25.self_attn.o_proj.weight": "model-
|
177 |
-
"model.layers.25.self_attn.q_proj.weight": "model-
|
178 |
-
"model.layers.25.self_attn.v_proj.weight": "model-
|
179 |
-
"model.layers.26.input_layernorm.weight": "model-
|
180 |
-
"model.layers.26.mlp.down_proj.weight": "model-
|
181 |
-
"model.layers.26.mlp.gate_proj.weight": "model-
|
182 |
-
"model.layers.26.mlp.up_proj.weight": "model-
|
183 |
-
"model.layers.26.post_attention_layernorm.weight": "model-
|
184 |
-
"model.layers.26.self_attn.k_proj.weight": "model-
|
185 |
-
"model.layers.26.self_attn.o_proj.weight": "model-
|
186 |
-
"model.layers.26.self_attn.q_proj.weight": "model-
|
187 |
-
"model.layers.26.self_attn.v_proj.weight": "model-
|
188 |
-
"model.layers.27.input_layernorm.weight": "model-
|
189 |
-
"model.layers.27.mlp.down_proj.weight": "model-
|
190 |
-
"model.layers.27.mlp.gate_proj.weight": "model-
|
191 |
-
"model.layers.27.mlp.up_proj.weight": "model-
|
192 |
-
"model.layers.27.post_attention_layernorm.weight": "model-
|
193 |
-
"model.layers.27.self_attn.k_proj.weight": "model-
|
194 |
-
"model.layers.27.self_attn.o_proj.weight": "model-
|
195 |
-
"model.layers.27.self_attn.q_proj.weight": "model-
|
196 |
-
"model.layers.27.self_attn.v_proj.weight": "model-
|
197 |
-
"model.layers.28.input_layernorm.weight": "model-
|
198 |
-
"model.layers.28.mlp.down_proj.weight": "model-
|
199 |
-
"model.layers.28.mlp.gate_proj.weight": "model-
|
200 |
-
"model.layers.28.mlp.up_proj.weight": "model-
|
201 |
-
"model.layers.28.post_attention_layernorm.weight": "model-
|
202 |
-
"model.layers.28.self_attn.k_proj.weight": "model-
|
203 |
-
"model.layers.28.self_attn.o_proj.weight": "model-
|
204 |
-
"model.layers.28.self_attn.q_proj.weight": "model-
|
205 |
-
"model.layers.28.self_attn.v_proj.weight": "model-
|
206 |
-
"model.layers.29.input_layernorm.weight": "model-
|
207 |
-
"model.layers.29.mlp.down_proj.weight": "model-
|
208 |
-
"model.layers.29.mlp.gate_proj.weight": "model-
|
209 |
-
"model.layers.29.mlp.up_proj.weight": "model-
|
210 |
-
"model.layers.29.post_attention_layernorm.weight": "model-
|
211 |
-
"model.layers.29.self_attn.k_proj.weight": "model-
|
212 |
-
"model.layers.29.self_attn.o_proj.weight": "model-
|
213 |
-
"model.layers.29.self_attn.q_proj.weight": "model-
|
214 |
-
"model.layers.29.self_attn.v_proj.weight": "model-
|
215 |
-
"model.layers.3.input_layernorm.weight": "model-
|
216 |
-
"model.layers.3.mlp.down_proj.weight": "model-
|
217 |
-
"model.layers.3.mlp.gate_proj.weight": "model-
|
218 |
-
"model.layers.3.mlp.up_proj.weight": "model-
|
219 |
-
"model.layers.3.post_attention_layernorm.weight": "model-
|
220 |
-
"model.layers.3.self_attn.k_proj.weight": "model-
|
221 |
-
"model.layers.3.self_attn.o_proj.weight": "model-
|
222 |
-
"model.layers.3.self_attn.q_proj.weight": "model-
|
223 |
-
"model.layers.3.self_attn.v_proj.weight": "model-
|
224 |
-
"model.layers.30.input_layernorm.weight": "model-
|
225 |
-
"model.layers.30.mlp.down_proj.weight": "model-
|
226 |
-
"model.layers.30.mlp.gate_proj.weight": "model-
|
227 |
-
"model.layers.30.mlp.up_proj.weight": "model-
|
228 |
-
"model.layers.30.post_attention_layernorm.weight": "model-
|
229 |
-
"model.layers.30.self_attn.k_proj.weight": "model-
|
230 |
-
"model.layers.30.self_attn.o_proj.weight": "model-
|
231 |
-
"model.layers.30.self_attn.q_proj.weight": "model-
|
232 |
-
"model.layers.30.self_attn.v_proj.weight": "model-
|
233 |
-
"model.layers.31.input_layernorm.weight": "model-
|
234 |
-
"model.layers.31.mlp.down_proj.weight": "model-
|
235 |
-
"model.layers.31.mlp.gate_proj.weight": "model-
|
236 |
-
"model.layers.31.mlp.up_proj.weight": "model-
|
237 |
-
"model.layers.31.post_attention_layernorm.weight": "model-
|
238 |
-
"model.layers.31.self_attn.k_proj.weight": "model-
|
239 |
-
"model.layers.31.self_attn.o_proj.weight": "model-
|
240 |
-
"model.layers.31.self_attn.q_proj.weight": "model-
|
241 |
-
"model.layers.31.self_attn.v_proj.weight": "model-
|
242 |
-
"model.layers.4.input_layernorm.weight": "model-
|
243 |
-
"model.layers.4.mlp.down_proj.weight": "model-
|
244 |
-
"model.layers.4.mlp.gate_proj.weight": "model-
|
245 |
-
"model.layers.4.mlp.up_proj.weight": "model-
|
246 |
-
"model.layers.4.post_attention_layernorm.weight": "model-
|
247 |
-
"model.layers.4.self_attn.k_proj.weight": "model-
|
248 |
-
"model.layers.4.self_attn.o_proj.weight": "model-
|
249 |
-
"model.layers.4.self_attn.q_proj.weight": "model-
|
250 |
-
"model.layers.4.self_attn.v_proj.weight": "model-
|
251 |
-
"model.layers.5.input_layernorm.weight": "model-
|
252 |
-
"model.layers.5.mlp.down_proj.weight": "model-
|
253 |
-
"model.layers.5.mlp.gate_proj.weight": "model-
|
254 |
-
"model.layers.5.mlp.up_proj.weight": "model-
|
255 |
-
"model.layers.5.post_attention_layernorm.weight": "model-
|
256 |
-
"model.layers.5.self_attn.k_proj.weight": "model-
|
257 |
-
"model.layers.5.self_attn.o_proj.weight": "model-
|
258 |
-
"model.layers.5.self_attn.q_proj.weight": "model-
|
259 |
-
"model.layers.5.self_attn.v_proj.weight": "model-
|
260 |
-
"model.layers.6.input_layernorm.weight": "model-
|
261 |
-
"model.layers.6.mlp.down_proj.weight": "model-
|
262 |
-
"model.layers.6.mlp.gate_proj.weight": "model-
|
263 |
-
"model.layers.6.mlp.up_proj.weight": "model-
|
264 |
-
"model.layers.6.post_attention_layernorm.weight": "model-
|
265 |
-
"model.layers.6.self_attn.k_proj.weight": "model-
|
266 |
-
"model.layers.6.self_attn.o_proj.weight": "model-
|
267 |
-
"model.layers.6.self_attn.q_proj.weight": "model-
|
268 |
-
"model.layers.6.self_attn.v_proj.weight": "model-
|
269 |
-
"model.layers.7.input_layernorm.weight": "model-
|
270 |
-
"model.layers.7.mlp.down_proj.weight": "model-
|
271 |
-
"model.layers.7.mlp.gate_proj.weight": "model-
|
272 |
-
"model.layers.7.mlp.up_proj.weight": "model-
|
273 |
-
"model.layers.7.post_attention_layernorm.weight": "model-
|
274 |
-
"model.layers.7.self_attn.k_proj.weight": "model-
|
275 |
-
"model.layers.7.self_attn.o_proj.weight": "model-
|
276 |
-
"model.layers.7.self_attn.q_proj.weight": "model-
|
277 |
-
"model.layers.7.self_attn.v_proj.weight": "model-
|
278 |
-
"model.layers.8.input_layernorm.weight": "model-
|
279 |
-
"model.layers.8.mlp.down_proj.weight": "model-
|
280 |
-
"model.layers.8.mlp.gate_proj.weight": "model-
|
281 |
-
"model.layers.8.mlp.up_proj.weight": "model-
|
282 |
-
"model.layers.8.post_attention_layernorm.weight": "model-
|
283 |
-
"model.layers.8.self_attn.k_proj.weight": "model-
|
284 |
-
"model.layers.8.self_attn.o_proj.weight": "model-
|
285 |
-
"model.layers.8.self_attn.q_proj.weight": "model-
|
286 |
-
"model.layers.8.self_attn.v_proj.weight": "model-
|
287 |
-
"model.layers.9.input_layernorm.weight": "model-
|
288 |
-
"model.layers.9.mlp.down_proj.weight": "model-
|
289 |
-
"model.layers.9.mlp.gate_proj.weight": "model-
|
290 |
-
"model.layers.9.mlp.up_proj.weight": "model-
|
291 |
-
"model.layers.9.post_attention_layernorm.weight": "model-
|
292 |
-
"model.layers.9.self_attn.k_proj.weight": "model-
|
293 |
-
"model.layers.9.self_attn.o_proj.weight": "model-
|
294 |
-
"model.layers.9.self_attn.q_proj.weight": "model-
|
295 |
-
"model.layers.9.self_attn.v_proj.weight": "model-
|
296 |
-
"model.norm.weight": "model-
|
297 |
}
|
298 |
}
|
|
|
3 |
"total_size": 16060522496
|
4 |
},
|
5 |
"weight_map": {
|
6 |
+
"lm_head.weight": "model-00017-of-00017.safetensors",
|
7 |
+
"model.embed_tokens.weight": "model-00001-of-00017.safetensors",
|
8 |
+
"model.layers.0.input_layernorm.weight": "model-00002-of-00017.safetensors",
|
9 |
+
"model.layers.0.mlp.down_proj.weight": "model-00002-of-00017.safetensors",
|
10 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00002-of-00017.safetensors",
|
11 |
+
"model.layers.0.mlp.up_proj.weight": "model-00002-of-00017.safetensors",
|
12 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00002-of-00017.safetensors",
|
13 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00002-of-00017.safetensors",
|
14 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00002-of-00017.safetensors",
|
15 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00002-of-00017.safetensors",
|
16 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00002-of-00017.safetensors",
|
17 |
+
"model.layers.1.input_layernorm.weight": "model-00002-of-00017.safetensors",
|
18 |
+
"model.layers.1.mlp.down_proj.weight": "model-00002-of-00017.safetensors",
|
19 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00002-of-00017.safetensors",
|
20 |
+
"model.layers.1.mlp.up_proj.weight": "model-00002-of-00017.safetensors",
|
21 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00002-of-00017.safetensors",
|
22 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00002-of-00017.safetensors",
|
23 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00002-of-00017.safetensors",
|
24 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00002-of-00017.safetensors",
|
25 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00002-of-00017.safetensors",
|
26 |
+
"model.layers.10.input_layernorm.weight": "model-00006-of-00017.safetensors",
|
27 |
+
"model.layers.10.mlp.down_proj.weight": "model-00006-of-00017.safetensors",
|
28 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00006-of-00017.safetensors",
|
29 |
+
"model.layers.10.mlp.up_proj.weight": "model-00006-of-00017.safetensors",
|
30 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00006-of-00017.safetensors",
|
31 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00006-of-00017.safetensors",
|
32 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00006-of-00017.safetensors",
|
33 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00006-of-00017.safetensors",
|
34 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00006-of-00017.safetensors",
|
35 |
+
"model.layers.11.input_layernorm.weight": "model-00007-of-00017.safetensors",
|
36 |
+
"model.layers.11.mlp.down_proj.weight": "model-00007-of-00017.safetensors",
|
37 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00007-of-00017.safetensors",
|
38 |
+
"model.layers.11.mlp.up_proj.weight": "model-00007-of-00017.safetensors",
|
39 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00007-of-00017.safetensors",
|
40 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00006-of-00017.safetensors",
|
41 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00006-of-00017.safetensors",
|
42 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00006-of-00017.safetensors",
|
43 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00006-of-00017.safetensors",
|
44 |
+
"model.layers.12.input_layernorm.weight": "model-00007-of-00017.safetensors",
|
45 |
+
"model.layers.12.mlp.down_proj.weight": "model-00007-of-00017.safetensors",
|
46 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00007-of-00017.safetensors",
|
47 |
+
"model.layers.12.mlp.up_proj.weight": "model-00007-of-00017.safetensors",
|
48 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00007-of-00017.safetensors",
|
49 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00007-of-00017.safetensors",
|
50 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00007-of-00017.safetensors",
|
51 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00007-of-00017.safetensors",
|
52 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00007-of-00017.safetensors",
|
53 |
+
"model.layers.13.input_layernorm.weight": "model-00008-of-00017.safetensors",
|
54 |
+
"model.layers.13.mlp.down_proj.weight": "model-00008-of-00017.safetensors",
|
55 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00007-of-00017.safetensors",
|
56 |
+
"model.layers.13.mlp.up_proj.weight": "model-00008-of-00017.safetensors",
|
57 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00008-of-00017.safetensors",
|
58 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00007-of-00017.safetensors",
|
59 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00007-of-00017.safetensors",
|
60 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00007-of-00017.safetensors",
|
61 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00007-of-00017.safetensors",
|
62 |
+
"model.layers.14.input_layernorm.weight": "model-00008-of-00017.safetensors",
|
63 |
+
"model.layers.14.mlp.down_proj.weight": "model-00008-of-00017.safetensors",
|
64 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00008-of-00017.safetensors",
|
65 |
+
"model.layers.14.mlp.up_proj.weight": "model-00008-of-00017.safetensors",
|
66 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00008-of-00017.safetensors",
|
67 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00008-of-00017.safetensors",
|
68 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00008-of-00017.safetensors",
|
69 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00008-of-00017.safetensors",
|
70 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00008-of-00017.safetensors",
|
71 |
+
"model.layers.15.input_layernorm.weight": "model-00009-of-00017.safetensors",
|
72 |
+
"model.layers.15.mlp.down_proj.weight": "model-00009-of-00017.safetensors",
|
73 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00008-of-00017.safetensors",
|
74 |
+
"model.layers.15.mlp.up_proj.weight": "model-00008-of-00017.safetensors",
|
75 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00009-of-00017.safetensors",
|
76 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00008-of-00017.safetensors",
|
77 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00008-of-00017.safetensors",
|
78 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00008-of-00017.safetensors",
|
79 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00008-of-00017.safetensors",
|
80 |
+
"model.layers.16.input_layernorm.weight": "model-00009-of-00017.safetensors",
|
81 |
+
"model.layers.16.mlp.down_proj.weight": "model-00009-of-00017.safetensors",
|
82 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00009-of-00017.safetensors",
|
83 |
+
"model.layers.16.mlp.up_proj.weight": "model-00009-of-00017.safetensors",
|
84 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00009-of-00017.safetensors",
|
85 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00009-of-00017.safetensors",
|
86 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00009-of-00017.safetensors",
|
87 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00009-of-00017.safetensors",
|
88 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00009-of-00017.safetensors",
|
89 |
+
"model.layers.17.input_layernorm.weight": "model-00009-of-00017.safetensors",
|
90 |
+
"model.layers.17.mlp.down_proj.weight": "model-00009-of-00017.safetensors",
|
91 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00009-of-00017.safetensors",
|
92 |
+
"model.layers.17.mlp.up_proj.weight": "model-00009-of-00017.safetensors",
|
93 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00009-of-00017.safetensors",
|
94 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00009-of-00017.safetensors",
|
95 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00009-of-00017.safetensors",
|
96 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00009-of-00017.safetensors",
|
97 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00009-of-00017.safetensors",
|
98 |
+
"model.layers.18.input_layernorm.weight": "model-00010-of-00017.safetensors",
|
99 |
+
"model.layers.18.mlp.down_proj.weight": "model-00010-of-00017.safetensors",
|
100 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00010-of-00017.safetensors",
|
101 |
+
"model.layers.18.mlp.up_proj.weight": "model-00010-of-00017.safetensors",
|
102 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00010-of-00017.safetensors",
|
103 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00010-of-00017.safetensors",
|
104 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00010-of-00017.safetensors",
|
105 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00010-of-00017.safetensors",
|
106 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00010-of-00017.safetensors",
|
107 |
+
"model.layers.19.input_layernorm.weight": "model-00010-of-00017.safetensors",
|
108 |
+
"model.layers.19.mlp.down_proj.weight": "model-00010-of-00017.safetensors",
|
109 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00010-of-00017.safetensors",
|
110 |
+
"model.layers.19.mlp.up_proj.weight": "model-00010-of-00017.safetensors",
|
111 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00010-of-00017.safetensors",
|
112 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00010-of-00017.safetensors",
|
113 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00010-of-00017.safetensors",
|
114 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00010-of-00017.safetensors",
|
115 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00010-of-00017.safetensors",
|
116 |
+
"model.layers.2.input_layernorm.weight": "model-00003-of-00017.safetensors",
|
117 |
+
"model.layers.2.mlp.down_proj.weight": "model-00003-of-00017.safetensors",
|
118 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00003-of-00017.safetensors",
|
119 |
+
"model.layers.2.mlp.up_proj.weight": "model-00003-of-00017.safetensors",
|
120 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00003-of-00017.safetensors",
|
121 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00002-of-00017.safetensors",
|
122 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00002-of-00017.safetensors",
|
123 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00002-of-00017.safetensors",
|
124 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00002-of-00017.safetensors",
|
125 |
+
"model.layers.20.input_layernorm.weight": "model-00011-of-00017.safetensors",
|
126 |
+
"model.layers.20.mlp.down_proj.weight": "model-00011-of-00017.safetensors",
|
127 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00011-of-00017.safetensors",
|
128 |
+
"model.layers.20.mlp.up_proj.weight": "model-00011-of-00017.safetensors",
|
129 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00011-of-00017.safetensors",
|
130 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00010-of-00017.safetensors",
|
131 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00010-of-00017.safetensors",
|
132 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00010-of-00017.safetensors",
|
133 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00010-of-00017.safetensors",
|
134 |
+
"model.layers.21.input_layernorm.weight": "model-00011-of-00017.safetensors",
|
135 |
+
"model.layers.21.mlp.down_proj.weight": "model-00011-of-00017.safetensors",
|
136 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00011-of-00017.safetensors",
|
137 |
+
"model.layers.21.mlp.up_proj.weight": "model-00011-of-00017.safetensors",
|
138 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00011-of-00017.safetensors",
|
139 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00011-of-00017.safetensors",
|
140 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00011-of-00017.safetensors",
|
141 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00011-of-00017.safetensors",
|
142 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00011-of-00017.safetensors",
|
143 |
+
"model.layers.22.input_layernorm.weight": "model-00012-of-00017.safetensors",
|
144 |
+
"model.layers.22.mlp.down_proj.weight": "model-00012-of-00017.safetensors",
|
145 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00011-of-00017.safetensors",
|
146 |
+
"model.layers.22.mlp.up_proj.weight": "model-00012-of-00017.safetensors",
|
147 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00012-of-00017.safetensors",
|
148 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00011-of-00017.safetensors",
|
149 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00011-of-00017.safetensors",
|
150 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00011-of-00017.safetensors",
|
151 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00011-of-00017.safetensors",
|
152 |
+
"model.layers.23.input_layernorm.weight": "model-00012-of-00017.safetensors",
|
153 |
+
"model.layers.23.mlp.down_proj.weight": "model-00012-of-00017.safetensors",
|
154 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00012-of-00017.safetensors",
|
155 |
+
"model.layers.23.mlp.up_proj.weight": "model-00012-of-00017.safetensors",
|
156 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00012-of-00017.safetensors",
|
157 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00012-of-00017.safetensors",
|
158 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00012-of-00017.safetensors",
|
159 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00012-of-00017.safetensors",
|
160 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00012-of-00017.safetensors",
|
161 |
+
"model.layers.24.input_layernorm.weight": "model-00013-of-00017.safetensors",
|
162 |
+
"model.layers.24.mlp.down_proj.weight": "model-00013-of-00017.safetensors",
|
163 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00012-of-00017.safetensors",
|
164 |
+
"model.layers.24.mlp.up_proj.weight": "model-00012-of-00017.safetensors",
|
165 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00013-of-00017.safetensors",
|
166 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00012-of-00017.safetensors",
|
167 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00012-of-00017.safetensors",
|
168 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00012-of-00017.safetensors",
|
169 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00012-of-00017.safetensors",
|
170 |
+
"model.layers.25.input_layernorm.weight": "model-00013-of-00017.safetensors",
|
171 |
+
"model.layers.25.mlp.down_proj.weight": "model-00013-of-00017.safetensors",
|
172 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00013-of-00017.safetensors",
|
173 |
+
"model.layers.25.mlp.up_proj.weight": "model-00013-of-00017.safetensors",
|
174 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00013-of-00017.safetensors",
|
175 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00013-of-00017.safetensors",
|
176 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00013-of-00017.safetensors",
|
177 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00013-of-00017.safetensors",
|
178 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00013-of-00017.safetensors",
|
179 |
+
"model.layers.26.input_layernorm.weight": "model-00013-of-00017.safetensors",
|
180 |
+
"model.layers.26.mlp.down_proj.weight": "model-00013-of-00017.safetensors",
|
181 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00013-of-00017.safetensors",
|
182 |
+
"model.layers.26.mlp.up_proj.weight": "model-00013-of-00017.safetensors",
|
183 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00013-of-00017.safetensors",
|
184 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00013-of-00017.safetensors",
|
185 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00013-of-00017.safetensors",
|
186 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00013-of-00017.safetensors",
|
187 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00013-of-00017.safetensors",
|
188 |
+
"model.layers.27.input_layernorm.weight": "model-00014-of-00017.safetensors",
|
189 |
+
"model.layers.27.mlp.down_proj.weight": "model-00014-of-00017.safetensors",
|
190 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00014-of-00017.safetensors",
|
191 |
+
"model.layers.27.mlp.up_proj.weight": "model-00014-of-00017.safetensors",
|
192 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00014-of-00017.safetensors",
|
193 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00014-of-00017.safetensors",
|
194 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00014-of-00017.safetensors",
|
195 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00014-of-00017.safetensors",
|
196 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00014-of-00017.safetensors",
|
197 |
+
"model.layers.28.input_layernorm.weight": "model-00014-of-00017.safetensors",
|
198 |
+
"model.layers.28.mlp.down_proj.weight": "model-00014-of-00017.safetensors",
|
199 |
+
"model.layers.28.mlp.gate_proj.weight": "model-00014-of-00017.safetensors",
|
200 |
+
"model.layers.28.mlp.up_proj.weight": "model-00014-of-00017.safetensors",
|
201 |
+
"model.layers.28.post_attention_layernorm.weight": "model-00014-of-00017.safetensors",
|
202 |
+
"model.layers.28.self_attn.k_proj.weight": "model-00014-of-00017.safetensors",
|
203 |
+
"model.layers.28.self_attn.o_proj.weight": "model-00014-of-00017.safetensors",
|
204 |
+
"model.layers.28.self_attn.q_proj.weight": "model-00014-of-00017.safetensors",
|
205 |
+
"model.layers.28.self_attn.v_proj.weight": "model-00014-of-00017.safetensors",
|
206 |
+
"model.layers.29.input_layernorm.weight": "model-00015-of-00017.safetensors",
|
207 |
+
"model.layers.29.mlp.down_proj.weight": "model-00015-of-00017.safetensors",
|
208 |
+
"model.layers.29.mlp.gate_proj.weight": "model-00015-of-00017.safetensors",
|
209 |
+
"model.layers.29.mlp.up_proj.weight": "model-00015-of-00017.safetensors",
|
210 |
+
"model.layers.29.post_attention_layernorm.weight": "model-00015-of-00017.safetensors",
|
211 |
+
"model.layers.29.self_attn.k_proj.weight": "model-00014-of-00017.safetensors",
|
212 |
+
"model.layers.29.self_attn.o_proj.weight": "model-00014-of-00017.safetensors",
|
213 |
+
"model.layers.29.self_attn.q_proj.weight": "model-00014-of-00017.safetensors",
|
214 |
+
"model.layers.29.self_attn.v_proj.weight": "model-00014-of-00017.safetensors",
|
215 |
+
"model.layers.3.input_layernorm.weight": "model-00003-of-00017.safetensors",
|
216 |
+
"model.layers.3.mlp.down_proj.weight": "model-00003-of-00017.safetensors",
|
217 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00003-of-00017.safetensors",
|
218 |
+
"model.layers.3.mlp.up_proj.weight": "model-00003-of-00017.safetensors",
|
219 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00003-of-00017.safetensors",
|
220 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00003-of-00017.safetensors",
|
221 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00003-of-00017.safetensors",
|
222 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00003-of-00017.safetensors",
|
223 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00003-of-00017.safetensors",
|
224 |
+
"model.layers.30.input_layernorm.weight": "model-00015-of-00017.safetensors",
|
225 |
+
"model.layers.30.mlp.down_proj.weight": "model-00015-of-00017.safetensors",
|
226 |
+
"model.layers.30.mlp.gate_proj.weight": "model-00015-of-00017.safetensors",
|
227 |
+
"model.layers.30.mlp.up_proj.weight": "model-00015-of-00017.safetensors",
|
228 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00015-of-00017.safetensors",
|
229 |
+
"model.layers.30.self_attn.k_proj.weight": "model-00015-of-00017.safetensors",
|
230 |
+
"model.layers.30.self_attn.o_proj.weight": "model-00015-of-00017.safetensors",
|
231 |
+
"model.layers.30.self_attn.q_proj.weight": "model-00015-of-00017.safetensors",
|
232 |
+
"model.layers.30.self_attn.v_proj.weight": "model-00015-of-00017.safetensors",
|
233 |
+
"model.layers.31.input_layernorm.weight": "model-00016-of-00017.safetensors",
|
234 |
+
"model.layers.31.mlp.down_proj.weight": "model-00016-of-00017.safetensors",
|
235 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00015-of-00017.safetensors",
|
236 |
+
"model.layers.31.mlp.up_proj.weight": "model-00016-of-00017.safetensors",
|
237 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00016-of-00017.safetensors",
|
238 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00015-of-00017.safetensors",
|
239 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00015-of-00017.safetensors",
|
240 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00015-of-00017.safetensors",
|
241 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00015-of-00017.safetensors",
|
242 |
+
"model.layers.4.input_layernorm.weight": "model-00004-of-00017.safetensors",
|
243 |
+
"model.layers.4.mlp.down_proj.weight": "model-00004-of-00017.safetensors",
|
244 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00003-of-00017.safetensors",
|
245 |
+
"model.layers.4.mlp.up_proj.weight": "model-00004-of-00017.safetensors",
|
246 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00004-of-00017.safetensors",
|
247 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00003-of-00017.safetensors",
|
248 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00003-of-00017.safetensors",
|
249 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00003-of-00017.safetensors",
|
250 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00003-of-00017.safetensors",
|
251 |
+
"model.layers.5.input_layernorm.weight": "model-00004-of-00017.safetensors",
|
252 |
+
"model.layers.5.mlp.down_proj.weight": "model-00004-of-00017.safetensors",
|
253 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00004-of-00017.safetensors",
|
254 |
+
"model.layers.5.mlp.up_proj.weight": "model-00004-of-00017.safetensors",
|
255 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00004-of-00017.safetensors",
|
256 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00004-of-00017.safetensors",
|
257 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00004-of-00017.safetensors",
|
258 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00004-of-00017.safetensors",
|
259 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00004-of-00017.safetensors",
|
260 |
+
"model.layers.6.input_layernorm.weight": "model-00005-of-00017.safetensors",
|
261 |
+
"model.layers.6.mlp.down_proj.weight": "model-00005-of-00017.safetensors",
|
262 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00004-of-00017.safetensors",
|
263 |
+
"model.layers.6.mlp.up_proj.weight": "model-00004-of-00017.safetensors",
|
264 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00005-of-00017.safetensors",
|
265 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00004-of-00017.safetensors",
|
266 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00004-of-00017.safetensors",
|
267 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00004-of-00017.safetensors",
|
268 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00004-of-00017.safetensors",
|
269 |
+
"model.layers.7.input_layernorm.weight": "model-00005-of-00017.safetensors",
|
270 |
+
"model.layers.7.mlp.down_proj.weight": "model-00005-of-00017.safetensors",
|
271 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00005-of-00017.safetensors",
|
272 |
+
"model.layers.7.mlp.up_proj.weight": "model-00005-of-00017.safetensors",
|
273 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00005-of-00017.safetensors",
|
274 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00005-of-00017.safetensors",
|
275 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00005-of-00017.safetensors",
|
276 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00005-of-00017.safetensors",
|
277 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00005-of-00017.safetensors",
|
278 |
+
"model.layers.8.input_layernorm.weight": "model-00005-of-00017.safetensors",
|
279 |
+
"model.layers.8.mlp.down_proj.weight": "model-00005-of-00017.safetensors",
|
280 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00005-of-00017.safetensors",
|
281 |
+
"model.layers.8.mlp.up_proj.weight": "model-00005-of-00017.safetensors",
|
282 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00005-of-00017.safetensors",
|
283 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00005-of-00017.safetensors",
|
284 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00005-of-00017.safetensors",
|
285 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00005-of-00017.safetensors",
|
286 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00005-of-00017.safetensors",
|
287 |
+
"model.layers.9.input_layernorm.weight": "model-00006-of-00017.safetensors",
|
288 |
+
"model.layers.9.mlp.down_proj.weight": "model-00006-of-00017.safetensors",
|
289 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00006-of-00017.safetensors",
|
290 |
+
"model.layers.9.mlp.up_proj.weight": "model-00006-of-00017.safetensors",
|
291 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00006-of-00017.safetensors",
|
292 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00006-of-00017.safetensors",
|
293 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00006-of-00017.safetensors",
|
294 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00006-of-00017.safetensors",
|
295 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00006-of-00017.safetensors",
|
296 |
+
"model.norm.weight": "model-00016-of-00017.safetensors"
|
297 |
}
|
298 |
}
|