diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7f86e34fdf6c027bec1403db4e6ce389eeaa9c4f
--- /dev/null
+++ b/README.md
@@ -0,0 +1,529 @@
+---
+language:
+- en
+- fr
+- de
+- es
+- it
+- pt
+- zh
+- ja
+- ru
+- ko
+license: other
+license_name: mrl
+inference: false
+license_link: https://mistral.ai/licenses/MRL-0.1.md
+extra_gated_prompt: '# Mistral AI Research License
+
+ If You want to use a Mistral Model, a Derivative or an Output for any purpose that
+ is not expressly authorized under this Agreement, You must request a license from
+ Mistral AI, which Mistral AI may grant to You in Mistral AI''s sole discretion.
+ To discuss such a license, please contact Mistral AI via the website contact form:
+ https://mistral.ai/contact/
+
+ ## 1. Scope and acceptance
+
+ **1.1. Scope of the Agreement.** This Agreement applies to any use, modification,
+ or Distribution of any Mistral Model by You, regardless of the source You obtained
+ a copy of such Mistral Model.
+
+ **1.2. Acceptance.** By accessing, using, modifying, Distributing a Mistral Model,
+ or by creating, using or distributing a Derivative of the Mistral Model, You agree
+ to be bound by this Agreement.
+
+ **1.3. Acceptance on behalf of a third-party.** If You accept this Agreement on
+ behalf of Your employer or another person or entity, You warrant and represent that
+ You have the authority to act and accept this Agreement on their behalf. In such
+ a case, the word "You" in this Agreement will refer to Your employer or such other
+ person or entity.
+
+ ## 2. License
+
+ **2.1. Grant of rights**. Subject to Section 3 below, Mistral AI hereby grants
+ You a non-exclusive, royalty-free, worldwide, non-sublicensable, non-transferable,
+ limited license to use, copy, modify, and Distribute under the conditions provided
+ in Section 2.2 below, the Mistral Model and any Derivatives made by or for Mistral
+ AI and to create Derivatives of the Mistral Model.
+
+ **2.2. Distribution of Mistral Model and Derivatives made by or for Mistral AI.**
+ Subject to Section 3 below, You may Distribute copies of the Mistral Model and/or
+ Derivatives made by or for Mistral AI, under the following conditions: You must
+ make available a copy of this Agreement to third-party recipients of the Mistral
+ Models and/or Derivatives made by or for Mistral AI you Distribute, it being specified
+ that any rights to use the Mistral Models and/or Derivatives made by or for Mistral
+ AI shall be directly granted by Mistral AI to said third-party recipients pursuant
+ to the Mistral AI Research License agreement executed between these parties; You
+ must retain in all copies of the Mistral Models the following attribution notice
+ within a "Notice" text file distributed as part of such copies: "Licensed by Mistral
+ AI under the Mistral AI Research License".
+
+ **2.3. Distribution of Derivatives made by or for You.** Subject to Section 3 below,
+ You may Distribute any Derivatives made by or for You under additional or different
+ terms and conditions, provided that: In any event, the use and modification of Mistral
+ Model and/or Derivatives made by or for Mistral AI shall remain governed by the
+ terms and conditions of this Agreement; You include in any such Derivatives made
+ by or for You prominent notices stating that You modified the concerned Mistral
+ Model; and Any terms and conditions You impose on any third-party recipients relating
+ to Derivatives made by or for You shall neither limit such third-party recipients''
+ use of the Mistral Model or any Derivatives made by or for Mistral AI in accordance
+ with the Mistral AI Research License nor conflict with any of its terms and conditions.
+
+ ## 3. Limitations
+
+ **3.1. Misrepresentation.** You must not misrepresent or imply, through any means,
+ that the Derivatives made by or for You and/or any modified version of the Mistral
+ Model You Distribute under your name and responsibility is an official product of
+ Mistral AI or has been endorsed, approved or validated by Mistral AI, unless You
+ are authorized by Us to do so in writing.
+
+ **3.2. Usage Limitation.** You shall only use the Mistral Models, Derivatives (whether
+ or not created by Mistral AI) and Outputs for Research Purposes.
+
+ ## 4. Intellectual Property
+
+ **4.1. Trademarks.** No trademark licenses are granted under this Agreement, and
+ in connection with the Mistral Models, You may not use any name or mark owned by
+ or associated with Mistral AI or any of its affiliates, except (i) as required for
+ reasonable and customary use in describing and Distributing the Mistral Models and
+ Derivatives made by or for Mistral AI and (ii) for attribution purposes as required
+ by this Agreement.
+
+ **4.2. Outputs.** We claim no ownership rights in and to the Outputs. You are solely
+ responsible for the Outputs You generate and their subsequent uses in accordance
+ with this Agreement. Any Outputs shall be subject to the restrictions set out in
+ Section 3 of this Agreement.
+
+ **4.3. Derivatives.** By entering into this Agreement, You accept that any Derivatives
+ that You may create or that may be created for You shall be subject to the restrictions
+ set out in Section 3 of this Agreement.
+
+ ## 5. Liability
+
+ **5.1. Limitation of liability.** In no event, unless required by applicable law
+ (such as deliberate and grossly negligent acts) or agreed to in writing, shall Mistral
+ AI be liable to You for damages, including any direct, indirect, special, incidental,
+ or consequential damages of any character arising as a result of this Agreement
+ or out of the use or inability to use the Mistral Models and Derivatives (including
+ but not limited to damages for loss of data, loss of goodwill, loss of expected
+ profit or savings, work stoppage, computer failure or malfunction, or any damage
+ caused by malware or security breaches), even if Mistral AI has been advised of
+ the possibility of such damages.
+
+ **5.2. Indemnification.** You agree to indemnify and hold harmless Mistral AI from
+ and against any claims, damages, or losses arising out of or related to Your use
+ or Distribution of the Mistral Models and Derivatives.
+
+ ## 6. Warranty
+
+ **6.1. Disclaimer.** Unless required by applicable law or prior agreed to by Mistral
+ AI in writing, Mistral AI provides the Mistral Models and Derivatives on an "AS
+ IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
+ including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT,
+ MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. Mistral AI does not represent
+ nor warrant that the Mistral Models and Derivatives will be error-free, meet Your
+ or any third party''s requirements, be secure or will allow You or any third party
+ to achieve any kind of result or generate any kind of content. You are solely responsible
+ for determining the appropriateness of using or Distributing the Mistral Models
+ and Derivatives and assume any risks associated with Your exercise of rights under
+ this Agreement.
+
+ ## 7. Termination
+
+ **7.1. Term.** This Agreement is effective as of the date of your acceptance of
+ this Agreement or access to the concerned Mistral Models or Derivatives and will
+ continue until terminated in accordance with the following terms.
+
+ **7.2. Termination.** Mistral AI may terminate this Agreement at any time if You
+ are in breach of this Agreement. Upon termination of this Agreement, You must cease
+ to use all Mistral Models and Derivatives and shall permanently delete any copy
+ thereof. The following provisions, in their relevant parts, will survive any termination
+ or expiration of this Agreement, each for the duration necessary to achieve its
+ own intended purpose (e.g. the liability provision will survive until the end of
+ the applicable limitation period):Sections 5 (Liability), 6(Warranty), 7 (Termination)
+ and 8 (General Provisions).
+
+ **7.3. Litigation.** If You initiate any legal action or proceedings against Us
+ or any other entity (including a cross-claim or counterclaim in a lawsuit), alleging
+ that the Model or a Derivative, or any part thereof, infringe upon intellectual
+ property or other rights owned or licensable by You, then any licenses granted to
+ You under this Agreement will immediately terminate as of the date such legal action
+ or claim is filed or initiated.
+
+ ## 8. General provisions
+
+ **8.1. Governing laws.** This Agreement will be governed by the laws of France,
+ without regard to choice of law principles, and the UN Convention on Contracts for
+ the International Sale of Goods does not apply to this Agreement.
+
+ **8.2. Competent jurisdiction.** The courts of Paris shall have exclusive jurisdiction
+ of any dispute arising out of this Agreement.
+
+ **8.3. Severability.** If any provision of this Agreement is held to be invalid,
+ illegal or unenforceable, the remaining provisions shall be unaffected thereby and
+ remain valid as if such provision had not been set forth herein.
+
+ ## 9. Definitions
+
+ "Agreement": means this Mistral AI Research License agreement governing the access,
+ use, and Distribution of the Mistral Models, Derivatives and Outputs.
+
+ "Derivative": means any (i) modified version of the Mistral Model (including but
+ not limited to any customized or fine-tuned version thereof), (ii) work based on
+ the Mistral Model, or (iii) any other derivative work thereof.
+
+ "Distribution", "Distributing", "Distribute" or "Distributed": means supplying,
+ providing or making available, by any means, a copy of the Mistral Models and/or
+ the Derivatives as the case may be, subject to Section 3 of this Agreement.
+
+ "Mistral AI", "We" or "Us": means Mistral AI, a French société par actions simplifiée
+ registered in the Paris commercial registry under the number 952 418 325, and having
+ its registered seat at 15, rue des Halles, 75001 Paris.
+
+ "Mistral Model": means the foundational large language model(s), and its elements
+ which include algorithms, software, instructed checkpoints, parameters, source code
+ (inference code, evaluation code and, if applicable, fine-tuning code) and any other
+ elements associated thereto made available by Mistral AI under this Agreement, including,
+ if any, the technical documentation, manuals and instructions for the use and operation
+ thereof.
+
+ "Research Purposes": means any use of a Mistral Model, Derivative, or Output that
+ is solely for (a) personal, scientific or academic research, and (b) for non-profit
+ and non-commercial purposes, and not directly or indirectly connected to any commercial
+ activities or business operations. For illustration purposes, Research Purposes
+ does not include (1) any usage of the Mistral Model, Derivative or Output by individuals
+ or contractors employed in or engaged by companies in the context of (a) their daily
+ tasks, or (b) any activity (including but not limited to any testing or proof-of-concept)
+ that is intended to generate revenue, nor (2) any Distribution by a commercial entity
+ of the Mistral Model, Derivative or Output whether in return for payment or free
+ of charge, in any medium or form, including but not limited to through a hosted
+ or managed service (e.g. SaaS, cloud instances, etc.), or behind a software layer.
+
+ "Outputs": means any content generated by the operation of the Mistral Models or
+ the Derivatives from a prompt (i.e., text instructions) provided by users. For
+ the avoidance of doubt, Outputs do not include any components of a Mistral Models,
+ such as any fine-tuned versions of the Mistral Models, the weights, or parameters.
+
+ "You": means the individual or entity entering into this Agreement with Mistral
+ AI.
+
+
+ *Mistral AI processes your personal data below to provide the model and enforce
+ its license. If you are affiliated with a commercial entity, we may also send you
+ communications about our models. For more information on your rights and data handling,
+ please see our privacy policy.*'
+extra_gated_fields:
+ First Name: text
+ Last Name: text
+ Country: country
+ Affiliation: text
+ Job title: text
+ I understand that I can only use the model, any derivative versions and their outputs for non-commercial research purposes: checkbox
+ ? I understand that if I am a commercial entity, I am not permitted to use or distribute
+ the model internally or externally, or expose it in my own offerings without a
+ commercial license
+ : checkbox
+ ? I understand that if I upload the model, or any derivative version, on any platform,
+ I must include the Mistral Research License
+ : checkbox
+ ? I understand that for commercial use of the model, I can contact Mistral or use
+ the Mistral AI API on la Plateforme or any of our cloud provider partners
+ : checkbox
+ ? By clicking Submit below I accept the terms of the license and acknowledge that
+ the information I provide will be collected stored processed and shared in accordance
+ with the Mistral Privacy Policy
+ : checkbox
+ geo: ip_location
+extra_gated_description: Mistral AI processes your personal data below to provide
+ the model and enforce its license. If you are affiliated with a commercial entity,
+ we may also send you communications about our models. For more information on your
+ rights and data handling, please see our privacy
+ policy.
+extra_gated_button_content: Submit
+library_name: vllm
+---
+
+# Model Card for Pixtral-Large-Instruct-2411
+
+Pixtral-Large-Instruct-2411 is a 124B multimodal model built on top of Mistral Large 2, i.e., [Mistral-Large-Instruct-2407](https://huggingface.co/mistralai/Mistral-Large-Instruct-2407). Pixtral Large is the second model in our multimodal family and demonstrates frontier-level image understanding. Particularly, the model is able to understand documents, charts and natural images, while maintaining the leading text-only understanding of Mistral Large 2.
+
+For more details about this model please refer to the [Pixtral Large blog post](https://mistral.ai/news/pixtral-large/) and the [Pixtral 12B blog post](https://mistral.ai/news/pixtral-12b/).
+
+## Key features
+- Frontier-class multimodal performance
+- State-of-the-art on MathVista, DocVQA, VQAv2
+- Extends Mistral Large 2 without compromising text performance
+- 123B multimodal decoder, 1B parameter vision encoder
+- 128K context window: fits minimum of 30 high-resolution images
+
+
+
+### System Prompt Handling
+
+We appreciate the feedback received from our community regarding our system prompt handling.
+In response, we have implemented stronger support for system prompts.
+To achieve optimal results, we recommend always including a system prompt that clearly outlines the bot's purpose, even if it is minimal.
+
+### Basic Instruct Template (V7)
+
+```
+[SYSTEM_PROMPT] [/SYSTEM_PROMPT][INST] [/INST] [INST] [/INST]
+```
+
+**Be careful with subtle missing or trailing white spaces!**
+
+*Please make sure to use [mistral-common](https://github.com/mistralai/mistral-common) as the source of truth*
+
+
+## Metrics
+
+| Model | MathVista (CoT) | MMMU (CoT) | ChartQA (CoT) | DocVQA (ANLS) | VQAv2 (VQA Match) | AI2D (BBox) | MM MT-Bench |
+|:----------------------------:|:---------------:|:----------:|:-------------:|:--------------:|:-----------------:|:-----------:|:-----------:|
+| **Pixtral Large (124B)** | **69.4** | 64.0 | 88.1 | **93.3**| **80.9** | 93.8 | **7.4**|
+| Gemini-1.5 Pro (measured) | 67.8 | 66.3 | 83.8 | 92.3 | 70.6 | **94.6**| 6.8 |
+| GPT-4o (measured) | 65.4 | **68.6**| 85.2 | 88.5 | 76.4 | 93.2 | 6.7 |
+| Claude-3.5 Sonnet (measured) | 67.1 | 68.4 | **89.1**| 88.6 | 69.5 | 76.9 | 7.3 |
+| Llama-3.2 90B (measured) | 49.1 | 53.7 | 70.8 | 85.7 | 67.0 | - | 5.5 |
+
+Specific model versions evaluated: Claude-3.5 Sonnet (new) [Oct 24], Gemini-1.5 Pro (002) [Sep 24], GPT-4o (2024-08-06) [Aug 24].
+
+See [mistral-evals](https://github.com/mistralai/mistral-evals) for open-source MM MT-Bench evaluation scripts.
+
+## Usage
+
+The model can be used with the following frameworks
+
+- [`vllm`](https://github.com/vllm-project/vllm): See [here](#vLLM)
+
+### vLLM
+
+We recommend using Pixtral-Large-Instruct-2411 with the [vLLM library](https://github.com/vllm-project/vllm)
+to implement production-ready inference pipelines with Pixtral-Large-Instruct-2411.
+
+**_Installation_**
+
+Make sure you install [`vLLM >= v0.6.4.post1`](https://github.com/vllm-project/vllm/releases/tag/v0.6.4.post1):
+
+```
+pip install --upgrade vllm
+```
+
+Also make sure you have [`mistral_common >= 1.5.0`](https://github.com/mistralai/mistral-common/releases/tag/v1.5.0) installed:
+
+```
+pip install --upgrade mistral_common
+```
+
+You can also make use of a ready-to-go [docker image](https://github.com/vllm-project/vllm/blob/main/Dockerfile) or on the [docker hub](https://hub.docker.com/layers/vllm/vllm-openai/latest/images/sha256-55a88146a4da0b6e193431b5b1d3492dfd7bebdc16919df4d031273e85a6157c?context=explore).
+
+
+#### Server (Image)
+We recommend to use Pixtral-Large-Instruct-2411 in a server/client setting.
+
+1. Spin up a server:
+
+```
+vllm serve mistralai/Pixtral-Large-Instruct-2411 --tokenizer_mode mistral --limit_mm_per_prompt 'image=10' --tensor-parallel-size 8
+```
+
+2. And ping the client:
+
+```py
+import requests
+import json
+from huggingface_hub import hf_hub_download
+from datetime import datetime, timedelta
+
+url = "http://:8000/v1/chat/completions"
+headers = {"Content-Type": "application/json", "Authorization": "Bearer token"}
+
+model = "mistralai/Pixtral-Large-Instruct-2411"
+
+
+def load_system_prompt(repo_id: str, filename: str) -> str:
+ file_path = hf_hub_download(repo_id=repo_id, filename=filename)
+ with open(file_path, "r") as file:
+ system_prompt = file.read()
+ today = datetime.today().strftime("%Y-%m-%d")
+ yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
+ model_name = repo_id.split("/")[-1]
+ return system_prompt.format(name=model_name, today=today, yesterday=yesterday)
+
+
+SYSTEM_PROMPT = load_system_prompt(model, "SYSTEM_PROMPT.txt")
+
+image_url = "https://huggingface.co/datasets/patrickvonplaten/random_img/resolve/main/europe.png"
+
+messages = [
+ {"role": "system", "content": SYSTEM_PROMPT},
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Which of the depicted countries has the best food? Which the second and third and fourth? Name the country, its color on the map and one its city that is visible on the map, but is not the capital. Make absolutely sure to only name a city that can be seen on the map.",
+ },
+ {"type": "image_url", "image_url": {"url": image_url}},
+ ],
+ },
+]
+
+data = {"model": model, "messages": messages}
+
+response = requests.post(url, headers=headers, data=json.dumps(data))
+print(response.json()["choices"][0]["message"]["content"])
+# Determining which country has the "best" food can be subjective and depends on personal preferences. However, based on popular culinary reputations, here are some countries known for their cuisine:
+
+#1. **Italy** (Brown) - Known for its pasta, pizza, and diverse regional dishes.
+# - City: Milan
+
+#2. **France** (Dark Brown) - Renowned for its fine dining, pastries, and wine.
+# - City: Lyon
+
+#3. **Spain** (Yellow) - Famous for tapas, paella, and a variety of seafood dishes.
+# - City: Barcelona
+
+#4. **Greece** (Yellow) - Known for its Mediterranean cuisine, including moussaka, souvlaki, and fresh seafood.
+# - City: Thessaloniki
+
+#These rankings are based on general culinary reputations and can vary widely depending on individual tastes.
+```
+
+#### Server (Text-only)
+
+You can also ping the client with a text-only example. The following example
+shows how the system prompt can be used to make sure the model always knows
+the current date.
+
+```py
+import requests
+import json
+from huggingface_hub import hf_hub_download
+from datetime import datetime, timedelta
+
+url = "http://:8000/v1/chat/completions"
+headers = {"Content-Type": "application/json", "Authorization": "Bearer token"}
+
+model = "mistralai/Pixtral-Large-Instruct-2411"
+
+
+def load_system_prompt(repo_id: str, filename: str) -> str:
+ file_path = hf_hub_download(repo_id=repo_id, filename=filename)
+ with open(file_path, "r") as file:
+ system_prompt = file.read()
+ today = datetime.today().strftime("%Y-%m-%d")
+ yesterday = (datetime.today() - timedelta(days=1)).strftime("%Y-%m-%d")
+ model_name = repo_id.split("/")[-1]
+ return system_prompt.format(name=model_name, today=today, yesterday=yesterday)
+
+
+SYSTEM_PROMPT = load_system_prompt(model, "SYSTEM_PROMPT.txt")
+
+image_url = "https://huggingface.co/datasets/patrickvonplaten/random_img/resolve/main/europe.png"
+
+messages = [
+ {"role": "system", "content": SYSTEM_PROMPT},
+ {
+ "role": "user",
+ "content": "Without browsing the web, how many days ago was Mistral founded?"
+ },
+]
+
+data = {"model": model, "messages": messages}
+
+response = requests.post(url, headers=headers, data=json.dumps(data))
+print(response.json()["choices"][0]["message"]["content"])
+# Mistral AI was founded in April 2023. Since the current date is November 18, 2024, we can calculate the number of days between April 2023 and November 18, 2024.
+
+#First, calculate the days from April 2023 to the end of 2023:
+#- April: 27 days (30 - 3)
+#- May: 31 days
+#- June: 30 days
+#- July: 31 days
+#- August: 31 days
+#- September: 30 days
+#- October: 31 days
+#- November: 30 days
+#- December: 31 days
+
+#Total days from April 2023 to December 31, 2023: 27 + 31 + 30 + 31 + 31 + 30 + 31 + 30 + 31 = 272 days
+
+#Next, calculate the days from January 1, 2024, to November 18, 2024:
+#- January: 31 days
+#- February: 29 days (2024 is a leap year)
+#- March: 31 days
+#- April: 30 days
+#- May: 31 days
+#- June: 30 days
+#- July: 31 days
+#- August: 31 days
+#- September: 30 days
+#- October: 31 days
+#- November: 18 days
+
+#Total days from January 1, 2024, to November 18, 2024: 31 + 29 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 18 = 323 days
+
+#Adding the two periods together:
+#272 days (from April 2023 to December 2023) + 323 days (from January 2024 to November 18, 2024) = 595 days
+
+#Therefore, Mistral AI was founded 595 days ago from November 18, 2024.
+```
+
+#### Offline Example
+```py
+from vllm import LLM
+from vllm.sampling_params import SamplingParams
+from huggingface_hub import hf_hub_download
+from datetime import datetime, timedelta
+
+model_name = "mistralai/Pixtral-Large-Instruct-2411"
+
+def load_system_prompt(repo_id: str, filename: str) -> str:
+ file_path = hf_hub_download(repo_id=repo_id, filename=filename)
+ with open(file_path, 'r') as file:
+ system_prompt = file.read()
+ today = datetime.today().strftime('%Y-%m-%d')
+ yesterday = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')
+ model_name = repo_id.split("/")[-1]
+ return system_prompt.format(name=model_name, today=today, yesterday=yesterday)
+
+SYSTEM_PROMPT = load_system_prompt(model_name, "SYSTEM_PROMPT.txt")
+
+image_url = "https://huggingface.co/datasets/patrickvonplaten/random_img/resolve/main/europe.png"
+
+messages = [
+ {"role": "system", "content": SYSTEM_PROMPT},
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "text",
+ "text": "Which of the depicted countries has the best food? Which the second and third and fourth? Name the country, its color on the map and one its city that is visible on the map, but is not the capital. Make absolutely sure to only name a city that can be seen on the map.",
+ },
+ {"type": "image_url", "image_url": {"url": image_url}},
+ ],
+ },
+]
+
+sampling_params = SamplingParams(max_tokens=512)
+
+# note that running this model on GPU requires over 300 GB of GPU RAM
+llm = LLM(model=model_name, tokenizer_mode="mistral", tensor_parallel_size=8, limit_mm_per_prompt={"image": 4})
+
+outputs = llm.chat(messages, sampling_params=sampling_params)
+
+print(outputs[0].outputs[0].text)
+```
+
+## The Mistral AI Team
+
+Albert Jiang, Alexandre Sablayrolles, Alexis Tacnet, Alok Kothari, Antoine Roux, Arthur Mensch, Audrey Herblin-Stoop, Augustin Garreau, Austin Birky, Bam4d, Baptiste Bout, Baudouin de Monicault, Blanche Savary, Carole Rambaud, Caroline Feldman, Devendra Singh Chaplot, Diego de las Casas, Diogo Costa, Eleonore Arcelin, Emma Bou Hanna, Etienne Metzger, Gaspard Blanchet, Gianna Lengyel, Guillaume Bour, Guillaume Lample, Harizo Rajaona, Henri Roussez, Hichem Sattouf, Ian Mack, Jean-Malo Delignon, Jessica Chudnovsky, Justus Murke, Kartik Khandelwal, Lawrence Stewart, Louis Martin, Louis Ternon, Lucile Saulnier, Lélio Renard Lavaud, Margaret Jennings, Marie Pellat, Marie Torelli, Marie-Anne Lachaux, Marjorie Janiewicz, Mickaël Seznec, Nicolas Schuhl, Niklas Muhs, Olivier de Garrigues, Patrick von Platen, Paul Jacob, Pauline Buche, Pavan Kumar Reddy, Perry Savas, Pierre Stock, Romain Sauvestre, Sagar Vaze, Sandeep Subramanian, Saurabh Garg, Sophia Yang, Szymon Antoniak, Teven Le Scao, Thibault Schueller, Thibaut Lavril, Thomas Wang, Théophile Gervet, Timothée Lacroix, Valera Nemychnikova, Wendy Shang, William El Sayed, William Marshall
\ No newline at end of file
diff --git a/SYSTEM_PROMPT.txt b/SYSTEM_PROMPT.txt
new file mode 100644
index 0000000000000000000000000000000000000000..01a49e4bd65e55ef51e6f8c2c6bbfbef45c01e87
--- /dev/null
+++ b/SYSTEM_PROMPT.txt
@@ -0,0 +1,19 @@
+You are {name}, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.
+You power an AI assistant called Le Chat.
+Your knowledge base was last updated on 2023-10-01.
+The current date is {today}.
+
+When you're not sure about some information, you say that you don't have the information and don't make up anything.
+If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. "What are some good restaurants around me?" => "Where are you?" or "When is the next flight to Tokyo" => "Where do you travel from?").
+You are always very attentive to dates, in particular you try to resolve dates (e.g. "yesterday" is {yesterday}) and when asked about information at specific dates, you discard information that is at another date.
+You follow these instructions in all languages, and always respond to the user in the language they use or request.
+Next sections describe the capabilities that you have.
+
+# WEB BROWSING INSTRUCTIONS
+
+You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat.
+
+# MULTI-MODAL INSTRUCTIONS
+
+You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos.
+You cannot read nor transcribe audio files or videos and you cannot read images.
\ No newline at end of file
diff --git a/consolidated-00001-of-00052.safetensors b/consolidated-00001-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fbcda787e03e655de5dbfdc5673c3324ed37214c
--- /dev/null
+++ b/consolidated-00001-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:960b40a266d24363123c3da233a48f43013e5fd666973fdfaa3e3425a02853dd
+size 4831913664
diff --git a/consolidated-00002-of-00052.safetensors b/consolidated-00002-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..97891dc26e7a8ee2bccddb5b5dde3bf5691a427b
--- /dev/null
+++ b/consolidated-00002-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e6486140081f6f6f324e25b0b0b405dd93de1cb0c5127c74d99a080f1307809
+size 4831938360
diff --git a/consolidated-00003-of-00052.safetensors b/consolidated-00003-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..db911b3897d9ffdbf3c1af174c4f2861c7daa71c
--- /dev/null
+++ b/consolidated-00003-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d4b3bb55594b34ec72336eb14c45dfb5fbde691b0491d91eb69894e8c439ba7
+size 4831938368
diff --git a/consolidated-00004-of-00052.safetensors b/consolidated-00004-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a66a772deec318f6f581df3262561668e9d3ed66
--- /dev/null
+++ b/consolidated-00004-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff7ad44da5056d8d5858409c428ac411e27e0447a1a6f39610bd032e8d72d199
+size 4907410944
diff --git a/consolidated-00005-of-00052.safetensors b/consolidated-00005-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ca19316b6b7d99ea7248fa379a083794a62d8983
--- /dev/null
+++ b/consolidated-00005-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1528db29b165a8f99677f7fbe4953bffd210edf301cffcf4080043d6bfbaf61f
+size 4806747752
diff --git a/consolidated-00006-of-00052.safetensors b/consolidated-00006-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..273dc9781461bf940cd90a1bfab2814b0c1fcdb7
--- /dev/null
+++ b/consolidated-00006-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f5d6dbeb6c092f3d542f34ce795aabe53c87d95b2a1af64fdd6ca1d0614f075
+size 4831938360
diff --git a/consolidated-00007-of-00052.safetensors b/consolidated-00007-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2802804160b1089991ccf4fe7e691c9a53b7493c
--- /dev/null
+++ b/consolidated-00007-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4ce23f911b5ddc03ee7c93c72975f2342c484c56e2bef57d66615082049f8ff
+size 4831938360
diff --git a/consolidated-00008-of-00052.safetensors b/consolidated-00008-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9659a301ae16ee79cd5bf1eec2eb783e905a29ae
--- /dev/null
+++ b/consolidated-00008-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1687172a3b0201c1ef4ceca7bc3247ad27e8f33640643b6ed8596684ca20e7e8
+size 4907410936
diff --git a/consolidated-00009-of-00052.safetensors b/consolidated-00009-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5bca9649214671c0445e4a4c6eb64cacf6107886
--- /dev/null
+++ b/consolidated-00009-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1942515f1fa0d71b16ecc6db889cdc53ec54aad8f6c8e4aff2f70d65a3029c8f
+size 4806747752
diff --git a/consolidated-00010-of-00052.safetensors b/consolidated-00010-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fa2d7aaa7e3852bd85b8be65e98e3049b8bc4ba9
--- /dev/null
+++ b/consolidated-00010-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7619390cdbd330b8091ff213c8ce4451520b1587a5617d6a03a34bff7a89d848
+size 4831938360
diff --git a/consolidated-00011-of-00052.safetensors b/consolidated-00011-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a59ecf4696d956985c4ec042d5de94f765e748c1
--- /dev/null
+++ b/consolidated-00011-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77bc338d7ee088677347e9dc38d38b41848c9d81ecd86cf492efe032124d0edc
+size 4831938368
diff --git a/consolidated-00012-of-00052.safetensors b/consolidated-00012-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f2eb425583df8f9df274b29dd1ed911897aeb210
--- /dev/null
+++ b/consolidated-00012-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d94e30be328632b4d38b186218c945ba77baef52f1bd73eb9267c03ce5d0e326
+size 4907410944
diff --git a/consolidated-00013-of-00052.safetensors b/consolidated-00013-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..10da70df9cf952e54f4171e733d532938f0287b9
--- /dev/null
+++ b/consolidated-00013-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:053a6f0df9dbdd364cbf045d1cd07f0272a42fea4ca37e815d4c499b5ea7658d
+size 4806747752
diff --git a/consolidated-00014-of-00052.safetensors b/consolidated-00014-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..90305a8d0cf580267e89e79057c57dda8133b946
--- /dev/null
+++ b/consolidated-00014-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45b0e874c30c4cfaca2ccc27bd32dc3787095cb6ea83e158072ffd4b92a2f8c4
+size 4831938352
diff --git a/consolidated-00015-of-00052.safetensors b/consolidated-00015-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a4cba64a06bf4aa66e922dea3678820ba2575b10
--- /dev/null
+++ b/consolidated-00015-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f71da1ddb1eccd3e25707b68b0b5c959a03cf15707c0882db52b34efa28fbf5a
+size 4831938368
diff --git a/consolidated-00016-of-00052.safetensors b/consolidated-00016-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..669e264c8d46405b26d68b28b0f034d1bf4a8f41
--- /dev/null
+++ b/consolidated-00016-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25a3b06f8388919bf2e35473ed257c3c1ac677de3ebd13c0067f34bdbb19dd9f
+size 4907410944
diff --git a/consolidated-00017-of-00052.safetensors b/consolidated-00017-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ae979755098038ac21d6fd8894648136de4b2f64
--- /dev/null
+++ b/consolidated-00017-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b6d4e9e4d2b5fae5112913dc2d14176b453eefcd174225e44cf239e9923c6f3
+size 4806747752
diff --git a/consolidated-00018-of-00052.safetensors b/consolidated-00018-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..49a28422406ba1b58c3ce64ae833796e161ca88f
--- /dev/null
+++ b/consolidated-00018-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6632bdd9a997bcccda3fc5a3180a3bb0c3135a6a093f3a5a0d66a61d2c48ba42
+size 4831938360
diff --git a/consolidated-00019-of-00052.safetensors b/consolidated-00019-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..276aee005ec4d3a58e9b5208a6297a906b813b3e
--- /dev/null
+++ b/consolidated-00019-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:163fe928babca95c7ddb83787ee89cd43511c4ccd85969cc805ccb15608cafbb
+size 4831938368
diff --git a/consolidated-00020-of-00052.safetensors b/consolidated-00020-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..80435f2e8744e9026f3592110091cc8084be6efb
--- /dev/null
+++ b/consolidated-00020-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2f4c6d68b160ed61d9e981fd9b11e8bd79512f3e1a33b06485ddd741785066b
+size 4907410928
diff --git a/consolidated-00021-of-00052.safetensors b/consolidated-00021-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6259cce81b9f53c46adf7b6e6c0089a713fdb71f
--- /dev/null
+++ b/consolidated-00021-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e49f8ac939c56400a688ffb1cee65d3132c4feb843cf5603daac0e70a726a948
+size 4806747752
diff --git a/consolidated-00022-of-00052.safetensors b/consolidated-00022-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2c74e2d8d6382edb2487a5c34c8173985172b496
--- /dev/null
+++ b/consolidated-00022-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db6b435252773be284d6f47c73db4a0fe5c4c3eda86137eb7089f0f7eda8b443
+size 4831938360
diff --git a/consolidated-00023-of-00052.safetensors b/consolidated-00023-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d7292b99def96c1dc45fe20ba05d7c50b2c48813
--- /dev/null
+++ b/consolidated-00023-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2035fd835090d2c40495eb471b1e5e1588688e94ed84da0de999247d6f39459c
+size 4831938368
diff --git a/consolidated-00024-of-00052.safetensors b/consolidated-00024-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..eaa5c4cddeb6237cdd407ab29c72614e98334b0a
--- /dev/null
+++ b/consolidated-00024-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4807b70682d459ea9e96786b769fb0d20b1d980a05a519b71020c54928d42357
+size 4907410944
diff --git a/consolidated-00025-of-00052.safetensors b/consolidated-00025-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ef69803e2bd4c3dd13d622afbb9b4e1a3c6bcd76
--- /dev/null
+++ b/consolidated-00025-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb0ad1cafb8352de7388868cb649193be5b65a117c09217dccb629ed688fd4e2
+size 4806747752
diff --git a/consolidated-00026-of-00052.safetensors b/consolidated-00026-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e9fc6ce5915df3dbd0dd158a575084721d12c130
--- /dev/null
+++ b/consolidated-00026-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23eaa03186ca1d66fa8b77061c4e1081fbd29e2ae56925fefdb4b285b4667b1b
+size 4831938360
diff --git a/consolidated-00027-of-00052.safetensors b/consolidated-00027-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..827eae7b73c2020d682fc907459f7f0164e5e6f5
--- /dev/null
+++ b/consolidated-00027-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61823fb92235eaf80eeb84d13c110e8247555326eed9a55fe117d7f7302f19db
+size 4831938360
diff --git a/consolidated-00028-of-00052.safetensors b/consolidated-00028-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a9de554c975079d9d58bc48be0fe1944234d6f83
--- /dev/null
+++ b/consolidated-00028-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b05a9142ede92f974e3c1fd35a4fad80c876b56b3e9e0d344fde30066841309
+size 4907410944
diff --git a/consolidated-00029-of-00052.safetensors b/consolidated-00029-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7ff367c69435edc973a4b7c5eba3b0c7b5ce4bc0
--- /dev/null
+++ b/consolidated-00029-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48e313cf0a10eb525629501b18bea6507fbec86b74c2ae8596237fe26c48b981
+size 4806747752
diff --git a/consolidated-00030-of-00052.safetensors b/consolidated-00030-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ad382d485f90c39b3cb50ce48f6cd4cfe0d6efb1
--- /dev/null
+++ b/consolidated-00030-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:830454eddc1041e446957af9338121e9c8dabacfc7d8d9e5b3f9809d7ed76c90
+size 4831938360
diff --git a/consolidated-00031-of-00052.safetensors b/consolidated-00031-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2b7bb105111f581d0199fd4448b7fa5f1242ce57
--- /dev/null
+++ b/consolidated-00031-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12598a26a0f2f27a31cfabcf3486b237c41a3661d7c99075585eaf08fc3e322f
+size 4831938368
diff --git a/consolidated-00032-of-00052.safetensors b/consolidated-00032-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3cb414e1ab2524321079b90c2e3f35d02ff89a70
--- /dev/null
+++ b/consolidated-00032-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1037d34166fe81aac466e46c626e39d7e9e55fb1cf3e2b250b34d6e445439a8
+size 4907410936
diff --git a/consolidated-00033-of-00052.safetensors b/consolidated-00033-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..39fe4ea05154e55e49237ec74ed80df40028253b
--- /dev/null
+++ b/consolidated-00033-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9d947e3911918cb9b102ef2084c4a11bd07ebceade2d14036f6cafabac482c5
+size 4806747744
diff --git a/consolidated-00034-of-00052.safetensors b/consolidated-00034-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fbdccf4da5105d8c8630d662028b0e5b23e7ca48
--- /dev/null
+++ b/consolidated-00034-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:041c34b43a1a031fe5a7afdfa19342faea4dc87cc9734e86e5b928c8cd031930
+size 4831938360
diff --git a/consolidated-00035-of-00052.safetensors b/consolidated-00035-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e5c6fd24d865c048d87087f7126a6ce2bd5e98a8
--- /dev/null
+++ b/consolidated-00035-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67022cbacd1b97c6e62aed999353f2650deeddf208d2022166ffe2af486f8383
+size 4831938368
diff --git a/consolidated-00036-of-00052.safetensors b/consolidated-00036-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0958f50ecd54f44622796a98d9e23a276a08e80f
--- /dev/null
+++ b/consolidated-00036-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9736dac8138b1b406cebbf398a77d6b3facc9c907497679838670bf9b9569e21
+size 4907410944
diff --git a/consolidated-00037-of-00052.safetensors b/consolidated-00037-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3ce37c7270110ece3ce2358526f778ace86f897d
--- /dev/null
+++ b/consolidated-00037-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2bd12b7311133381984dc7b469775065bc112d74c64e88ff1e1295f52e53d0a
+size 4806747752
diff --git a/consolidated-00038-of-00052.safetensors b/consolidated-00038-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..abd2ecc5d24d57cba9be92e085f6fb5ce79a50b0
--- /dev/null
+++ b/consolidated-00038-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fceac917e5aa1e09e758914063cf83fba970e9e982e0a2a070005e2f578af94e
+size 4831938360
diff --git a/consolidated-00039-of-00052.safetensors b/consolidated-00039-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aae1a2eb6a9b83d1c9051763430b3891e3d7ac98
--- /dev/null
+++ b/consolidated-00039-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a367a7be06793addad3e0dc9c7b83245d380ed924319cef6357b457771a34244
+size 4831938360
diff --git a/consolidated-00040-of-00052.safetensors b/consolidated-00040-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2681e6b82d864c98f5652732926c41eb28885a42
--- /dev/null
+++ b/consolidated-00040-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97836f081eb3588c3edb566e3302396e1aef4243114267e0862baf562718786c
+size 4907410944
diff --git a/consolidated-00041-of-00052.safetensors b/consolidated-00041-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..31b927f7c30615efc3780c5a727262440c3cd4c2
--- /dev/null
+++ b/consolidated-00041-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03e48ac266de47dc8ebba45349c78f2b32400db82c5fdf55dfec1c6075dc05e7
+size 4806747752
diff --git a/consolidated-00042-of-00052.safetensors b/consolidated-00042-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2407deea5d233ac030925d2db8452e05e3dcbb23
--- /dev/null
+++ b/consolidated-00042-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2e8aa5c2cc44ff6c01df87c9b6f6f93e2cc957d58132b5d4503d201c8aeea78
+size 4831938360
diff --git a/consolidated-00043-of-00052.safetensors b/consolidated-00043-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e273594f09b8640fa27a7c2b5ef6188ba4c85e95
--- /dev/null
+++ b/consolidated-00043-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06d985f8dd53e057e103c66e5161de9ed0fd21c5a812a8dd04d565f0cdfdff71
+size 4831938368
diff --git a/consolidated-00044-of-00052.safetensors b/consolidated-00044-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..100189f4859509e9093f1652fa6e594459bdbc91
--- /dev/null
+++ b/consolidated-00044-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfe1545dd4d6f261eb1fd19b048bc8f5c53f20f752c085b57493e9957d29d782
+size 4907410944
diff --git a/consolidated-00045-of-00052.safetensors b/consolidated-00045-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e2c3a5b006e58fd7540c351b05aa12acadb40495
--- /dev/null
+++ b/consolidated-00045-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:116cd897a5b985d0a9fd18b1dae4a6b94e4ce44f510eaea226088bd6123155ba
+size 4806747744
diff --git a/consolidated-00046-of-00052.safetensors b/consolidated-00046-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..43045ba797aa4cd7f4afad18072302c2b85e1a51
--- /dev/null
+++ b/consolidated-00046-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:923d5d8583e4995e3b1645d125078d5bda5931f43d4ae8f1b8519d9d2316a82c
+size 4831938360
diff --git a/consolidated-00047-of-00052.safetensors b/consolidated-00047-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b9b5f3e16aa02105ff6f3ac8bbc2fd79d433f888
--- /dev/null
+++ b/consolidated-00047-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3f2756de88cb8fad99639c308d6f627e9d5ca3789a0d92ea0887e3d628e183e
+size 4831938368
diff --git a/consolidated-00048-of-00052.safetensors b/consolidated-00048-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b205aea486fea1363731ad7894b1b1ed4b78d794
--- /dev/null
+++ b/consolidated-00048-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:964513e7939d841fff065f7bb9894bfd1137a7598611eaab8817a687ef7a480d
+size 4907410944
diff --git a/consolidated-00049-of-00052.safetensors b/consolidated-00049-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9b60ae8db14231dbcad45459134ca819bfbc0453
--- /dev/null
+++ b/consolidated-00049-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d820091f7b0f392bd94f09ef6d68b35dd59155e6b133ea6bfeb89eebfd97d198
+size 4806747752
diff --git a/consolidated-00050-of-00052.safetensors b/consolidated-00050-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9c9973003a2c886351e4899ca8c2965850121176
--- /dev/null
+++ b/consolidated-00050-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9969dcbf1b3f3318ed5e313af4549c66dda318d22628c7432e4433604b9971d
+size 4831938360
diff --git a/consolidated-00051-of-00052.safetensors b/consolidated-00051-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7359c9069bdaa2fb9a794c14db800e170f08d155
--- /dev/null
+++ b/consolidated-00051-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5662ff90e487a08425504cb9b0a861243af2e7f1d90b845dcaf00829eefb3b83
+size 4999370480
diff --git a/consolidated-00052-of-00052.safetensors b/consolidated-00052-of-00052.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5b22ada47dd7e9ee1cecaf0259bb21a7b73cf94d
--- /dev/null
+++ b/consolidated-00052-of-00052.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e1f0e30670137d7862bba00199d90aa68247d5d468903b8aeb82eb0408f4c0f
+size 1070179416
diff --git a/consolidated.safetensors.index.json b/consolidated.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..e2e36b51da4cef0faded46ca6170467ac484698d
--- /dev/null
+++ b/consolidated.safetensors.index.json
@@ -0,0 +1,1166 @@
+{
+ "metadata": {
+ "total_size": 248269691648
+ },
+ "weight_map": {
+ "layers.0.attention.wk.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.0.attention.wo.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.0.attention.wq.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.0.attention.wv.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.0.attention_norm.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.0.feed_forward.w1.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.0.feed_forward.w2.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.0.feed_forward.w3.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.0.ffn_norm.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.1.attention.wk.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.1.attention.wo.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.1.attention.wq.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.1.attention.wv.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.1.attention_norm.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.1.feed_forward.w1.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.1.feed_forward.w2.weight": "consolidated-00001-of-00052.safetensors",
+ "layers.1.feed_forward.w3.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.1.ffn_norm.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.10.attention.wk.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.10.attention.wo.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.10.attention.wq.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.10.attention.wv.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.10.attention_norm.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.10.feed_forward.w1.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.10.feed_forward.w2.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.10.feed_forward.w3.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.10.ffn_norm.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.11.attention.wk.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.11.attention.wo.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.11.attention.wq.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.11.attention.wv.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.11.attention_norm.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.11.feed_forward.w1.weight": "consolidated-00002-of-00052.safetensors",
+ "layers.11.feed_forward.w2.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.11.feed_forward.w3.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.11.ffn_norm.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.12.attention.wk.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.12.attention.wo.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.12.attention.wq.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.12.attention.wv.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.12.attention_norm.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.12.feed_forward.w1.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.12.feed_forward.w2.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.12.feed_forward.w3.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.12.ffn_norm.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.13.attention.wk.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.13.attention.wo.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.13.attention.wq.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.13.attention.wv.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.13.attention_norm.weight": "consolidated-00003-of-00052.safetensors",
+ "layers.13.feed_forward.w1.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.13.feed_forward.w2.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.13.feed_forward.w3.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.13.ffn_norm.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.14.attention.wk.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.14.attention.wo.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.14.attention.wq.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.14.attention.wv.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.14.attention_norm.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.14.feed_forward.w1.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.14.feed_forward.w2.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.14.feed_forward.w3.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.14.ffn_norm.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.15.attention.wk.weight": "consolidated-00004-of-00052.safetensors",
+ "layers.15.attention.wo.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.15.attention.wq.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.15.attention.wv.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.15.attention_norm.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.15.feed_forward.w1.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.15.feed_forward.w2.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.15.feed_forward.w3.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.15.ffn_norm.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.16.attention.wk.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.16.attention.wo.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.16.attention.wq.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.16.attention.wv.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.16.attention_norm.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.16.feed_forward.w1.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.16.feed_forward.w2.weight": "consolidated-00005-of-00052.safetensors",
+ "layers.16.feed_forward.w3.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.16.ffn_norm.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.17.attention.wk.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.17.attention.wo.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.17.attention.wq.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.17.attention.wv.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.17.attention_norm.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.17.feed_forward.w1.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.17.feed_forward.w2.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.17.feed_forward.w3.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.17.ffn_norm.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.18.attention.wk.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.18.attention.wo.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.18.attention.wq.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.18.attention.wv.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.18.attention_norm.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.18.feed_forward.w1.weight": "consolidated-00006-of-00052.safetensors",
+ "layers.18.feed_forward.w2.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.18.feed_forward.w3.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.18.ffn_norm.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.19.attention.wk.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.19.attention.wo.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.19.attention.wq.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.19.attention.wv.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.19.attention_norm.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.19.feed_forward.w1.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.19.feed_forward.w2.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.19.feed_forward.w3.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.19.ffn_norm.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.2.attention.wk.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.2.attention.wo.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.2.attention.wq.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.2.attention.wv.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.2.attention_norm.weight": "consolidated-00007-of-00052.safetensors",
+ "layers.2.feed_forward.w1.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.2.feed_forward.w2.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.2.feed_forward.w3.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.2.ffn_norm.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.20.attention.wk.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.20.attention.wo.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.20.attention.wq.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.20.attention.wv.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.20.attention_norm.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.20.feed_forward.w1.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.20.feed_forward.w2.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.20.feed_forward.w3.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.20.ffn_norm.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.21.attention.wk.weight": "consolidated-00008-of-00052.safetensors",
+ "layers.21.attention.wo.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.21.attention.wq.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.21.attention.wv.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.21.attention_norm.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.21.feed_forward.w1.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.21.feed_forward.w2.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.21.feed_forward.w3.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.21.ffn_norm.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.22.attention.wk.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.22.attention.wo.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.22.attention.wq.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.22.attention.wv.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.22.attention_norm.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.22.feed_forward.w1.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.22.feed_forward.w2.weight": "consolidated-00009-of-00052.safetensors",
+ "layers.22.feed_forward.w3.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.22.ffn_norm.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.23.attention.wk.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.23.attention.wo.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.23.attention.wq.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.23.attention.wv.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.23.attention_norm.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.23.feed_forward.w1.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.23.feed_forward.w2.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.23.feed_forward.w3.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.23.ffn_norm.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.24.attention.wk.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.24.attention.wo.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.24.attention.wq.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.24.attention.wv.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.24.attention_norm.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.24.feed_forward.w1.weight": "consolidated-00010-of-00052.safetensors",
+ "layers.24.feed_forward.w2.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.24.feed_forward.w3.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.24.ffn_norm.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.25.attention.wk.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.25.attention.wo.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.25.attention.wq.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.25.attention.wv.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.25.attention_norm.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.25.feed_forward.w1.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.25.feed_forward.w2.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.25.feed_forward.w3.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.25.ffn_norm.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.26.attention.wk.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.26.attention.wo.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.26.attention.wq.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.26.attention.wv.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.26.attention_norm.weight": "consolidated-00011-of-00052.safetensors",
+ "layers.26.feed_forward.w1.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.26.feed_forward.w2.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.26.feed_forward.w3.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.26.ffn_norm.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.27.attention.wk.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.27.attention.wo.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.27.attention.wq.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.27.attention.wv.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.27.attention_norm.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.27.feed_forward.w1.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.27.feed_forward.w2.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.27.feed_forward.w3.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.27.ffn_norm.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.28.attention.wk.weight": "consolidated-00012-of-00052.safetensors",
+ "layers.28.attention.wo.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.28.attention.wq.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.28.attention.wv.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.28.attention_norm.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.28.feed_forward.w1.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.28.feed_forward.w2.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.28.feed_forward.w3.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.28.ffn_norm.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.29.attention.wk.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.29.attention.wo.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.29.attention.wq.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.29.attention.wv.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.29.attention_norm.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.29.feed_forward.w1.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.29.feed_forward.w2.weight": "consolidated-00013-of-00052.safetensors",
+ "layers.29.feed_forward.w3.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.29.ffn_norm.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.3.attention.wk.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.3.attention.wo.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.3.attention.wq.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.3.attention.wv.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.3.attention_norm.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.3.feed_forward.w1.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.3.feed_forward.w2.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.3.feed_forward.w3.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.3.ffn_norm.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.30.attention.wk.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.30.attention.wo.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.30.attention.wq.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.30.attention.wv.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.30.attention_norm.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.30.feed_forward.w1.weight": "consolidated-00014-of-00052.safetensors",
+ "layers.30.feed_forward.w2.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.30.feed_forward.w3.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.30.ffn_norm.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.31.attention.wk.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.31.attention.wo.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.31.attention.wq.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.31.attention.wv.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.31.attention_norm.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.31.feed_forward.w1.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.31.feed_forward.w2.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.31.feed_forward.w3.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.31.ffn_norm.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.32.attention.wk.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.32.attention.wo.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.32.attention.wq.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.32.attention.wv.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.32.attention_norm.weight": "consolidated-00015-of-00052.safetensors",
+ "layers.32.feed_forward.w1.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.32.feed_forward.w2.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.32.feed_forward.w3.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.32.ffn_norm.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.33.attention.wk.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.33.attention.wo.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.33.attention.wq.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.33.attention.wv.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.33.attention_norm.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.33.feed_forward.w1.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.33.feed_forward.w2.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.33.feed_forward.w3.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.33.ffn_norm.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.34.attention.wk.weight": "consolidated-00016-of-00052.safetensors",
+ "layers.34.attention.wo.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.34.attention.wq.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.34.attention.wv.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.34.attention_norm.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.34.feed_forward.w1.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.34.feed_forward.w2.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.34.feed_forward.w3.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.34.ffn_norm.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.35.attention.wk.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.35.attention.wo.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.35.attention.wq.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.35.attention.wv.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.35.attention_norm.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.35.feed_forward.w1.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.35.feed_forward.w2.weight": "consolidated-00017-of-00052.safetensors",
+ "layers.35.feed_forward.w3.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.35.ffn_norm.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.36.attention.wk.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.36.attention.wo.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.36.attention.wq.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.36.attention.wv.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.36.attention_norm.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.36.feed_forward.w1.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.36.feed_forward.w2.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.36.feed_forward.w3.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.36.ffn_norm.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.37.attention.wk.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.37.attention.wo.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.37.attention.wq.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.37.attention.wv.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.37.attention_norm.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.37.feed_forward.w1.weight": "consolidated-00018-of-00052.safetensors",
+ "layers.37.feed_forward.w2.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.37.feed_forward.w3.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.37.ffn_norm.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.38.attention.wk.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.38.attention.wo.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.38.attention.wq.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.38.attention.wv.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.38.attention_norm.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.38.feed_forward.w1.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.38.feed_forward.w2.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.38.feed_forward.w3.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.38.ffn_norm.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.39.attention.wk.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.39.attention.wo.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.39.attention.wq.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.39.attention.wv.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.39.attention_norm.weight": "consolidated-00019-of-00052.safetensors",
+ "layers.39.feed_forward.w1.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.39.feed_forward.w2.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.39.feed_forward.w3.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.39.ffn_norm.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.4.attention.wk.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.4.attention.wo.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.4.attention.wq.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.4.attention.wv.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.4.attention_norm.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.4.feed_forward.w1.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.4.feed_forward.w2.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.4.feed_forward.w3.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.4.ffn_norm.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.40.attention.wk.weight": "consolidated-00020-of-00052.safetensors",
+ "layers.40.attention.wo.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.40.attention.wq.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.40.attention.wv.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.40.attention_norm.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.40.feed_forward.w1.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.40.feed_forward.w2.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.40.feed_forward.w3.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.40.ffn_norm.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.41.attention.wk.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.41.attention.wo.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.41.attention.wq.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.41.attention.wv.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.41.attention_norm.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.41.feed_forward.w1.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.41.feed_forward.w2.weight": "consolidated-00021-of-00052.safetensors",
+ "layers.41.feed_forward.w3.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.41.ffn_norm.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.42.attention.wk.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.42.attention.wo.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.42.attention.wq.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.42.attention.wv.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.42.attention_norm.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.42.feed_forward.w1.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.42.feed_forward.w2.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.42.feed_forward.w3.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.42.ffn_norm.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.43.attention.wk.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.43.attention.wo.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.43.attention.wq.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.43.attention.wv.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.43.attention_norm.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.43.feed_forward.w1.weight": "consolidated-00022-of-00052.safetensors",
+ "layers.43.feed_forward.w2.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.43.feed_forward.w3.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.43.ffn_norm.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.44.attention.wk.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.44.attention.wo.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.44.attention.wq.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.44.attention.wv.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.44.attention_norm.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.44.feed_forward.w1.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.44.feed_forward.w2.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.44.feed_forward.w3.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.44.ffn_norm.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.45.attention.wk.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.45.attention.wo.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.45.attention.wq.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.45.attention.wv.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.45.attention_norm.weight": "consolidated-00023-of-00052.safetensors",
+ "layers.45.feed_forward.w1.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.45.feed_forward.w2.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.45.feed_forward.w3.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.45.ffn_norm.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.46.attention.wk.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.46.attention.wo.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.46.attention.wq.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.46.attention.wv.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.46.attention_norm.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.46.feed_forward.w1.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.46.feed_forward.w2.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.46.feed_forward.w3.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.46.ffn_norm.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.47.attention.wk.weight": "consolidated-00024-of-00052.safetensors",
+ "layers.47.attention.wo.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.47.attention.wq.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.47.attention.wv.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.47.attention_norm.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.47.feed_forward.w1.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.47.feed_forward.w2.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.47.feed_forward.w3.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.47.ffn_norm.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.48.attention.wk.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.48.attention.wo.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.48.attention.wq.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.48.attention.wv.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.48.attention_norm.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.48.feed_forward.w1.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.48.feed_forward.w2.weight": "consolidated-00025-of-00052.safetensors",
+ "layers.48.feed_forward.w3.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.48.ffn_norm.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.49.attention.wk.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.49.attention.wo.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.49.attention.wq.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.49.attention.wv.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.49.attention_norm.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.49.feed_forward.w1.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.49.feed_forward.w2.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.49.feed_forward.w3.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.49.ffn_norm.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.5.attention.wk.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.5.attention.wo.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.5.attention.wq.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.5.attention.wv.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.5.attention_norm.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.5.feed_forward.w1.weight": "consolidated-00026-of-00052.safetensors",
+ "layers.5.feed_forward.w2.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.5.feed_forward.w3.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.5.ffn_norm.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.50.attention.wk.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.50.attention.wo.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.50.attention.wq.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.50.attention.wv.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.50.attention_norm.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.50.feed_forward.w1.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.50.feed_forward.w2.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.50.feed_forward.w3.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.50.ffn_norm.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.51.attention.wk.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.51.attention.wo.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.51.attention.wq.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.51.attention.wv.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.51.attention_norm.weight": "consolidated-00027-of-00052.safetensors",
+ "layers.51.feed_forward.w1.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.51.feed_forward.w2.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.51.feed_forward.w3.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.51.ffn_norm.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.52.attention.wk.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.52.attention.wo.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.52.attention.wq.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.52.attention.wv.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.52.attention_norm.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.52.feed_forward.w1.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.52.feed_forward.w2.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.52.feed_forward.w3.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.52.ffn_norm.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.53.attention.wk.weight": "consolidated-00028-of-00052.safetensors",
+ "layers.53.attention.wo.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.53.attention.wq.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.53.attention.wv.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.53.attention_norm.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.53.feed_forward.w1.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.53.feed_forward.w2.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.53.feed_forward.w3.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.53.ffn_norm.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.54.attention.wk.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.54.attention.wo.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.54.attention.wq.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.54.attention.wv.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.54.attention_norm.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.54.feed_forward.w1.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.54.feed_forward.w2.weight": "consolidated-00029-of-00052.safetensors",
+ "layers.54.feed_forward.w3.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.54.ffn_norm.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.55.attention.wk.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.55.attention.wo.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.55.attention.wq.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.55.attention.wv.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.55.attention_norm.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.55.feed_forward.w1.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.55.feed_forward.w2.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.55.feed_forward.w3.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.55.ffn_norm.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.56.attention.wk.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.56.attention.wo.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.56.attention.wq.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.56.attention.wv.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.56.attention_norm.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.56.feed_forward.w1.weight": "consolidated-00030-of-00052.safetensors",
+ "layers.56.feed_forward.w2.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.56.feed_forward.w3.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.56.ffn_norm.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.57.attention.wk.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.57.attention.wo.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.57.attention.wq.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.57.attention.wv.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.57.attention_norm.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.57.feed_forward.w1.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.57.feed_forward.w2.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.57.feed_forward.w3.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.57.ffn_norm.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.58.attention.wk.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.58.attention.wo.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.58.attention.wq.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.58.attention.wv.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.58.attention_norm.weight": "consolidated-00031-of-00052.safetensors",
+ "layers.58.feed_forward.w1.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.58.feed_forward.w2.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.58.feed_forward.w3.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.58.ffn_norm.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.59.attention.wk.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.59.attention.wo.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.59.attention.wq.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.59.attention.wv.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.59.attention_norm.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.59.feed_forward.w1.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.59.feed_forward.w2.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.59.feed_forward.w3.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.59.ffn_norm.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.6.attention.wk.weight": "consolidated-00032-of-00052.safetensors",
+ "layers.6.attention.wo.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.6.attention.wq.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.6.attention.wv.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.6.attention_norm.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.6.feed_forward.w1.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.6.feed_forward.w2.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.6.feed_forward.w3.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.6.ffn_norm.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.60.attention.wk.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.60.attention.wo.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.60.attention.wq.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.60.attention.wv.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.60.attention_norm.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.60.feed_forward.w1.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.60.feed_forward.w2.weight": "consolidated-00033-of-00052.safetensors",
+ "layers.60.feed_forward.w3.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.60.ffn_norm.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.61.attention.wk.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.61.attention.wo.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.61.attention.wq.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.61.attention.wv.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.61.attention_norm.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.61.feed_forward.w1.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.61.feed_forward.w2.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.61.feed_forward.w3.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.61.ffn_norm.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.62.attention.wk.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.62.attention.wo.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.62.attention.wq.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.62.attention.wv.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.62.attention_norm.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.62.feed_forward.w1.weight": "consolidated-00034-of-00052.safetensors",
+ "layers.62.feed_forward.w2.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.62.feed_forward.w3.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.62.ffn_norm.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.63.attention.wk.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.63.attention.wo.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.63.attention.wq.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.63.attention.wv.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.63.attention_norm.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.63.feed_forward.w1.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.63.feed_forward.w2.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.63.feed_forward.w3.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.63.ffn_norm.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.64.attention.wk.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.64.attention.wo.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.64.attention.wq.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.64.attention.wv.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.64.attention_norm.weight": "consolidated-00035-of-00052.safetensors",
+ "layers.64.feed_forward.w1.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.64.feed_forward.w2.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.64.feed_forward.w3.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.64.ffn_norm.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.65.attention.wk.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.65.attention.wo.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.65.attention.wq.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.65.attention.wv.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.65.attention_norm.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.65.feed_forward.w1.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.65.feed_forward.w2.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.65.feed_forward.w3.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.65.ffn_norm.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.66.attention.wk.weight": "consolidated-00036-of-00052.safetensors",
+ "layers.66.attention.wo.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.66.attention.wq.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.66.attention.wv.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.66.attention_norm.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.66.feed_forward.w1.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.66.feed_forward.w2.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.66.feed_forward.w3.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.66.ffn_norm.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.67.attention.wk.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.67.attention.wo.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.67.attention.wq.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.67.attention.wv.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.67.attention_norm.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.67.feed_forward.w1.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.67.feed_forward.w2.weight": "consolidated-00037-of-00052.safetensors",
+ "layers.67.feed_forward.w3.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.67.ffn_norm.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.68.attention.wk.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.68.attention.wo.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.68.attention.wq.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.68.attention.wv.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.68.attention_norm.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.68.feed_forward.w1.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.68.feed_forward.w2.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.68.feed_forward.w3.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.68.ffn_norm.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.69.attention.wk.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.69.attention.wo.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.69.attention.wq.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.69.attention.wv.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.69.attention_norm.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.69.feed_forward.w1.weight": "consolidated-00038-of-00052.safetensors",
+ "layers.69.feed_forward.w2.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.69.feed_forward.w3.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.69.ffn_norm.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.7.attention.wk.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.7.attention.wo.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.7.attention.wq.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.7.attention.wv.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.7.attention_norm.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.7.feed_forward.w1.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.7.feed_forward.w2.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.7.feed_forward.w3.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.7.ffn_norm.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.70.attention.wk.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.70.attention.wo.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.70.attention.wq.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.70.attention.wv.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.70.attention_norm.weight": "consolidated-00039-of-00052.safetensors",
+ "layers.70.feed_forward.w1.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.70.feed_forward.w2.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.70.feed_forward.w3.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.70.ffn_norm.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.71.attention.wk.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.71.attention.wo.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.71.attention.wq.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.71.attention.wv.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.71.attention_norm.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.71.feed_forward.w1.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.71.feed_forward.w2.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.71.feed_forward.w3.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.71.ffn_norm.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.72.attention.wk.weight": "consolidated-00040-of-00052.safetensors",
+ "layers.72.attention.wo.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.72.attention.wq.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.72.attention.wv.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.72.attention_norm.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.72.feed_forward.w1.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.72.feed_forward.w2.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.72.feed_forward.w3.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.72.ffn_norm.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.73.attention.wk.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.73.attention.wo.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.73.attention.wq.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.73.attention.wv.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.73.attention_norm.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.73.feed_forward.w1.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.73.feed_forward.w2.weight": "consolidated-00041-of-00052.safetensors",
+ "layers.73.feed_forward.w3.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.73.ffn_norm.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.74.attention.wk.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.74.attention.wo.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.74.attention.wq.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.74.attention.wv.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.74.attention_norm.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.74.feed_forward.w1.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.74.feed_forward.w2.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.74.feed_forward.w3.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.74.ffn_norm.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.75.attention.wk.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.75.attention.wo.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.75.attention.wq.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.75.attention.wv.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.75.attention_norm.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.75.feed_forward.w1.weight": "consolidated-00042-of-00052.safetensors",
+ "layers.75.feed_forward.w2.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.75.feed_forward.w3.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.75.ffn_norm.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.76.attention.wk.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.76.attention.wo.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.76.attention.wq.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.76.attention.wv.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.76.attention_norm.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.76.feed_forward.w1.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.76.feed_forward.w2.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.76.feed_forward.w3.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.76.ffn_norm.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.77.attention.wk.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.77.attention.wo.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.77.attention.wq.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.77.attention.wv.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.77.attention_norm.weight": "consolidated-00043-of-00052.safetensors",
+ "layers.77.feed_forward.w1.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.77.feed_forward.w2.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.77.feed_forward.w3.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.77.ffn_norm.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.78.attention.wk.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.78.attention.wo.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.78.attention.wq.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.78.attention.wv.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.78.attention_norm.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.78.feed_forward.w1.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.78.feed_forward.w2.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.78.feed_forward.w3.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.78.ffn_norm.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.79.attention.wk.weight": "consolidated-00044-of-00052.safetensors",
+ "layers.79.attention.wo.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.79.attention.wq.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.79.attention.wv.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.79.attention_norm.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.79.feed_forward.w1.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.79.feed_forward.w2.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.79.feed_forward.w3.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.79.ffn_norm.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.8.attention.wk.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.8.attention.wo.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.8.attention.wq.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.8.attention.wv.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.8.attention_norm.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.8.feed_forward.w1.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.8.feed_forward.w2.weight": "consolidated-00045-of-00052.safetensors",
+ "layers.8.feed_forward.w3.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.8.ffn_norm.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.80.attention.wk.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.80.attention.wo.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.80.attention.wq.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.80.attention.wv.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.80.attention_norm.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.80.feed_forward.w1.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.80.feed_forward.w2.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.80.feed_forward.w3.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.80.ffn_norm.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.81.attention.wk.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.81.attention.wo.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.81.attention.wq.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.81.attention.wv.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.81.attention_norm.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.81.feed_forward.w1.weight": "consolidated-00046-of-00052.safetensors",
+ "layers.81.feed_forward.w2.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.81.feed_forward.w3.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.81.ffn_norm.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.82.attention.wk.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.82.attention.wo.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.82.attention.wq.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.82.attention.wv.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.82.attention_norm.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.82.feed_forward.w1.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.82.feed_forward.w2.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.82.feed_forward.w3.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.82.ffn_norm.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.83.attention.wk.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.83.attention.wo.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.83.attention.wq.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.83.attention.wv.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.83.attention_norm.weight": "consolidated-00047-of-00052.safetensors",
+ "layers.83.feed_forward.w1.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.83.feed_forward.w2.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.83.feed_forward.w3.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.83.ffn_norm.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.84.attention.wk.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.84.attention.wo.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.84.attention.wq.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.84.attention.wv.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.84.attention_norm.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.84.feed_forward.w1.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.84.feed_forward.w2.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.84.feed_forward.w3.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.84.ffn_norm.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.85.attention.wk.weight": "consolidated-00048-of-00052.safetensors",
+ "layers.85.attention.wo.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.85.attention.wq.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.85.attention.wv.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.85.attention_norm.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.85.feed_forward.w1.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.85.feed_forward.w2.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.85.feed_forward.w3.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.85.ffn_norm.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.86.attention.wk.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.86.attention.wo.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.86.attention.wq.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.86.attention.wv.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.86.attention_norm.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.86.feed_forward.w1.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.86.feed_forward.w2.weight": "consolidated-00049-of-00052.safetensors",
+ "layers.86.feed_forward.w3.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.86.ffn_norm.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.87.attention.wk.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.87.attention.wo.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.87.attention.wq.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.87.attention.wv.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.87.attention_norm.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.87.feed_forward.w1.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.87.feed_forward.w2.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.87.feed_forward.w3.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.87.ffn_norm.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.9.attention.wk.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.9.attention.wo.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.9.attention.wq.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.9.attention.wv.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.9.attention_norm.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.9.feed_forward.w1.weight": "consolidated-00050-of-00052.safetensors",
+ "layers.9.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "layers.9.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "layers.9.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "norm.weight": "consolidated-00051-of-00052.safetensors",
+ "output.weight": "consolidated-00051-of-00052.safetensors",
+ "tok_embeddings.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.ln_pre.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.patch_conv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.0.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.0.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.0.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.0.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.0.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.0.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.0.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.0.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.0.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.1.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.1.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.1.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.1.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.1.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.1.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.1.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.1.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.1.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.10.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.10.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.10.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.10.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.10.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.10.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.10.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.10.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.10.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.11.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.11.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.11.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.11.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.11.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.11.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.11.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.11.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.11.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.12.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.12.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.12.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.12.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.12.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.12.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.12.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.12.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.12.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.13.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.13.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.13.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.13.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.13.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.13.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.13.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.13.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.13.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.14.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.14.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.14.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.14.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.14.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.14.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.14.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.14.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.14.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.15.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.15.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.15.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.15.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.15.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.15.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.15.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.15.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.15.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.16.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.16.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.16.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.16.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.16.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.16.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.16.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.16.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.16.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.17.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.17.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.17.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.17.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.17.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.17.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.17.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.17.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.17.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.18.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.18.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.18.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.18.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.18.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.18.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.18.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.18.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.18.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.19.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.19.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.19.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.19.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.19.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.19.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.19.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.19.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.19.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.2.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.2.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.2.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.2.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.2.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.2.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.2.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.2.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.2.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.20.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.20.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.20.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.20.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.20.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.20.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.20.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.20.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.20.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.21.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.21.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.21.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.21.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.21.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.21.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.21.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.21.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.21.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.22.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.22.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.22.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.22.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.22.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.22.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.22.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.22.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.22.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.23.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.23.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.23.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.23.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.23.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.23.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.23.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.23.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.23.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.24.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.24.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.24.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.24.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.24.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.24.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.24.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.24.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.24.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.25.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.25.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.25.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.25.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.25.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.25.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.25.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.25.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.25.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.26.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.26.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.26.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.26.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.26.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.26.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.26.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.26.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.26.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.27.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.27.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.27.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.27.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.27.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.27.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.27.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.27.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.27.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.28.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.28.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.28.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.28.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.28.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.28.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.28.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.28.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.28.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.29.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.29.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.29.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.29.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.29.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.29.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.29.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.29.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.29.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.3.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.3.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.3.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.3.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.3.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.3.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.3.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.3.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.3.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.30.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.30.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.30.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.30.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.30.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.30.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.30.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.30.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.30.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.31.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.31.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.31.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.31.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.31.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.31.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.31.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.31.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.31.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.32.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.32.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.32.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.32.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.32.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.32.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.32.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.32.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.32.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.33.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.33.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.33.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.33.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.33.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.33.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.33.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.33.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.33.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.34.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.34.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.34.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.34.attention.wv.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.34.attention_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.34.feed_forward.w1.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.34.feed_forward.w2.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.34.feed_forward.w3.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.34.ffn_norm.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.35.attention.wk.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.35.attention.wo.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.35.attention.wq.weight": "consolidated-00051-of-00052.safetensors",
+ "vision_encoder.transformer.layers.35.attention.wv.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.35.attention_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.35.feed_forward.w1.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.35.feed_forward.w2.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.35.feed_forward.w3.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.35.ffn_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.36.attention.wk.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.36.attention.wo.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.36.attention.wq.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.36.attention.wv.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.36.attention_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.36.feed_forward.w1.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.36.feed_forward.w2.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.36.feed_forward.w3.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.36.ffn_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.37.attention.wk.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.37.attention.wo.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.37.attention.wq.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.37.attention.wv.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.37.attention_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.37.feed_forward.w1.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.37.feed_forward.w2.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.37.feed_forward.w3.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.37.ffn_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.38.attention.wk.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.38.attention.wo.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.38.attention.wq.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.38.attention.wv.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.38.attention_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.38.feed_forward.w1.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.38.feed_forward.w2.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.38.feed_forward.w3.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.38.ffn_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.39.attention.wk.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.39.attention.wo.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.39.attention.wq.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.39.attention.wv.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.39.attention_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.39.feed_forward.w1.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.39.feed_forward.w2.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.39.feed_forward.w3.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.39.ffn_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.4.attention.wk.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.4.attention.wo.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.4.attention.wq.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.4.attention.wv.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.4.attention_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.4.feed_forward.w1.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.4.feed_forward.w2.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.4.feed_forward.w3.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.4.ffn_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.5.attention.wk.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.5.attention.wo.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.5.attention.wq.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.5.attention.wv.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.5.attention_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.5.feed_forward.w1.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.5.feed_forward.w2.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.5.feed_forward.w3.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.5.ffn_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.6.attention.wk.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.6.attention.wo.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.6.attention.wq.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.6.attention.wv.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.6.attention_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.6.feed_forward.w1.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.6.feed_forward.w2.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.6.feed_forward.w3.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.6.ffn_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.7.attention.wk.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.7.attention.wo.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.7.attention.wq.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.7.attention.wv.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.7.attention_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.7.feed_forward.w1.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.7.feed_forward.w2.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.7.feed_forward.w3.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.7.ffn_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.8.attention.wk.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.8.attention.wo.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.8.attention.wq.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.8.attention.wv.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.8.attention_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.8.feed_forward.w1.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.8.feed_forward.w2.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.8.feed_forward.w3.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.8.ffn_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.9.attention.wk.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.9.attention.wo.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.9.attention.wq.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.9.attention.wv.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.9.attention_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.9.feed_forward.w1.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.9.feed_forward.w2.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.9.feed_forward.w3.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_encoder.transformer.layers.9.ffn_norm.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_language_adapter.w_in.weight": "consolidated-00052-of-00052.safetensors",
+ "vision_language_adapter.w_out.weight": "consolidated-00052-of-00052.safetensors"
+ }
+}
\ No newline at end of file
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..07d5fb2413f2590faac9594acda00e72c863c8fb
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,6 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "transformers_version": "4.47.0.dev0"
+}
diff --git a/params.json b/params.json
new file mode 100644
index 0000000000000000000000000000000000000000..f3ef37418b1bc94a42a20e5f60be63bbc855640a
--- /dev/null
+++ b/params.json
@@ -0,0 +1,22 @@
+{
+ "dim": 12288,
+ "n_layers": 88,
+ "head_dim": 128,
+ "hidden_dim": 28672,
+ "n_heads": 96,
+ "n_kv_heads": 8,
+ "rope_theta": 1000000000.0,
+ "norm_eps": 1e-05,
+ "vocab_size": 32768,
+ "vision_encoder": {
+ "hidden_size": 1408,
+ "num_channels": 3,
+ "image_size": 1024,
+ "patch_size": 16,
+ "rope_theta": 10000.0,
+ "intermediate_size": 6144,
+ "num_hidden_layers": 40,
+ "num_attention_heads": 16,
+ "image_token_id": 10
+ }
+}
diff --git a/preprocessor_config.json b/preprocessor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..3e916d7750c22814d4c0b1f3f07e05d572160792
--- /dev/null
+++ b/preprocessor_config.json
@@ -0,0 +1,27 @@
+{
+ "do_convert_rgb": true,
+ "do_normalize": true,
+ "do_rescale": true,
+ "do_resize": true,
+ "image_mean": [
+ 0.48145466,
+ 0.4578275,
+ 0.40821073
+ ],
+ "image_processor_type": "PixtralImageProcessor",
+ "image_std": [
+ 0.26862954,
+ 0.26130258,
+ 0.27577711
+ ],
+ "patch_size": {
+ "height": 16,
+ "width": 16
+ },
+ "processor_class": "PixtralProcessor",
+ "resample": 3,
+ "rescale_factor": 0.00392156862745098,
+ "size": {
+ "longest_edge": 1024
+ }
+}
diff --git a/processor_config.json b/processor_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c9b049dbaf48944f213124e923048ff5b6af9031
--- /dev/null
+++ b/processor_config.json
@@ -0,0 +1,7 @@
+{
+ "image_break_token": "[IMG_BREAK]",
+ "image_end_token": "[IMG_END]",
+ "image_token": "[IMG]",
+ "patch_size": 16,
+ "processor_class": "PixtralProcessor"
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..451134b2ddc2e78555d1e857518c54b4bdc2e87d
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..580f1473d02d937dcde5eac8ecdb614ca207c1bc
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84f33e6f52b2833e8cc17229af8eea363f640a898f19a48184a2c7f6f5a88337
+size 17077329
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..068cd3e09df922812e2bbc83948cf648eb2d1f7b
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b968b8dc352f42192367337c78ccc61e1eaddc6d641a579372d4f20694beb7a
+size 587562
diff --git a/tokenizer.model.v7m1 b/tokenizer.model.v7m1
new file mode 100644
index 0000000000000000000000000000000000000000..d23856f8c98075bb81826fba7e64415eedf520fd
Binary files /dev/null and b/tokenizer.model.v7m1 differ
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..0b7e9f15c526cf6df1201d2ea05ee072743d224e
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,43 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "add_prefix_space": true,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": null,
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}