Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- LICENSE +50 -0
- README.md +148 -0
- benchmark_performance_comparison.png +0 -0
- config.json +29 -0
- generation_config.json +7 -0
- helpingai-9b.Q4_0.gguf +3 -0
- model-00001-of-00002.safetensors +3 -0
- model-00002-of-00002.safetensors +3 -0
- model.safetensors.index.json +442 -0
- special_tokens_map.json +39 -0
- tokenizer.json +0 -0
- tokenizer_config.json +62 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
helpingai-9b.Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
|
LICENSE
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
************************************************
|
2 |
+
**** HelpingAI License ****
|
3 |
+
************************************************
|
4 |
+
|
5 |
+
Version 2.0
|
6 |
+
|
7 |
+
Developed by Abhay Koul
|
8 |
+
|
9 |
+
### Preamble
|
10 |
+
|
11 |
+
The HelpingAI License governs the use of HelpingAI's digital assets, including but not limited to software, scripts, datasets, documents, images, audio recordings, videos. The HelpingAI License aims to provide clear, comprehensive terms for accessing, modifying, and sharing resources, while promoting ethical development practices.
|
12 |
+
|
13 |
+
### Grant of Rights
|
14 |
+
|
15 |
+
Under the HelpingAI License, HelpingAI grants you the rights to copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Content, provided you comply with the terms and conditions outlined in this document.
|
16 |
+
|
17 |
+
### Terms and Conditions
|
18 |
+
|
19 |
+
To exercise the rights granted in the previous section, you must adhere to the following terms and conditions:
|
20 |
+
|
21 |
+
2.1. **Redistribution of Source Code.**
|
22 |
+
If you redistribute the Source Code, you must include the complete HelpingAI License with your distribution. You must also add clear notifications in all modified files stating:
|
23 |
+
|
24 |
+
> "This Work is released under the HelpingAI License v2.0."
|
25 |
+
|
26 |
+
2.2. **Distribution in Binary Form.**
|
27 |
+
If you distribute Binaries derived from the Source Code, you must include the following statement in your distribution:
|
28 |
+
|
29 |
+
> "This Work is based on the HelpingAI Licensed Work, under the HelpingAI License v2.0."
|
30 |
+
|
31 |
+
2.3. **Notification of Changes.**
|
32 |
+
You must clearly indicate any modifications you make to the Source Code or Documentation, including detailed comments about the nature and extent of the changes. Include the date and originator of the modifications.
|
33 |
+
|
34 |
+
2.4. **Branding Attribution.**
|
35 |
+
You must not remove or alter any HelpingAI branding, logos, or notices included in the Content without explicit prior consent from HelpingAI.
|
36 |
+
|
37 |
+
2.5. **Disclaimer of Warranty.**
|
38 |
+
The Content is provided "AS IS," without any implied warranties, including but not limited to warranties of merchantability, fitness for a particular purpose, and non-infringement.
|
39 |
+
|
40 |
+
2.6. **Limitation of Liability.**
|
41 |
+
To the maximum extent permitted by law, neither HelpingAI nor any contributor shall be liable for any loss, personal injury, property damage, or any indirect, special, incidental, or consequential damages arising from or related to the use of the Content.
|
42 |
+
|
43 |
+
2.7. **Governing Law.**
|
44 |
+
This HelpingAI License shall be governed and construed in accordance with the laws of the jurisdiction where HelpingAI primarily operates.
|
45 |
+
|
46 |
+
### Definitions
|
47 |
+
|
48 |
+
3.1. **"Source Code"** refers to the preferred form for making modifications to the Content, typically represented by human-readable programming languages, scripts, or documentation formats.
|
49 |
+
|
50 |
+
3.2. **"Binaries"** refers to compiled forms of the Source Code, such as executables, libraries, or similar artifacts produced from the Source Code.
|
README.md
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: other
|
3 |
+
license_name: helpingai
|
4 |
+
license_link: LICENSE.md
|
5 |
+
pipeline_tag: text-generation
|
6 |
+
tags:
|
7 |
+
- HelpingAI
|
8 |
+
- Emotionally Intelligent
|
9 |
+
- EQ
|
10 |
+
datasets:
|
11 |
+
- OEvortex/SentimentSynth
|
12 |
+
- OEvortex/EmotionalIntelligence-10K
|
13 |
+
---
|
14 |
+
|
15 |
+
# HelpingAI-9B: Emotionally Intelligent Conversational AI
|
16 |
+
|
17 |
+
![logo](https://huggingface.co/OEvortex/HelpingAI-3B/resolve/main/HelpingAI.png)
|
18 |
+
|
19 |
+
## Overview
|
20 |
+
HelpingAI-9B is a large language model designed for emotionally intelligent conversational interactions. It is trained to engage users with empathy, understanding, and supportive dialogue across a wide range of topics and contexts. The model aims to provide a supportive AI companion that can attune to users' emotional states and communicative needs.
|
21 |
+
|
22 |
+
## Objectives
|
23 |
+
- Engage in open-ended dialogue while displaying emotional intelligence
|
24 |
+
- Recognize and validate user emotions and emotional contexts
|
25 |
+
- Provide supportive, empathetic, and psychologically-grounded responses
|
26 |
+
- Avoid insensitive, harmful, or unethical speech
|
27 |
+
- Continuously improve emotional awareness and dialogue skills
|
28 |
+
|
29 |
+
## Methodology
|
30 |
+
HelpingAI-9B is based on the HelpingAI series and further trained using:
|
31 |
+
- Supervised learning on large dialogue datasets with emotional labeling
|
32 |
+
- Reinforcement learning with a reward model favoring emotionally supportive responses
|
33 |
+
- Constitution training to instill stable and beneficial objectives
|
34 |
+
- Knowledge augmentation from psychological resources on emotional intelligence
|
35 |
+
|
36 |
+
## Emotional Quotient (EQ)
|
37 |
+
HelpingAI-9B has achieved an impressive Emotional Quotient (EQ) of 89.23, surpassing almost all AI models in emotional intelligence. This EQ score reflects its advanced ability to understand and respond to human emotions in a supportive and empathetic manner.
|
38 |
+
|
39 |
+
![benchmarks](benchmark_performance_comparison.png)
|
40 |
+
|
41 |
+
## Usage code
|
42 |
+
```python
|
43 |
+
import torch
|
44 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
|
45 |
+
|
46 |
+
# Let's bring in the big guns! Our super cool HelpingAI-9B model
|
47 |
+
model = AutoModelForCausalLM.from_pretrained("OEvortex/HelpingAI-9B").to("cuda")
|
48 |
+
|
49 |
+
# We also need the special HelpingAI translator to understand our chats
|
50 |
+
tokenizer = AutoTokenizer.from_pretrained("OEvortex/HelpingAI-9B")
|
51 |
+
|
52 |
+
# This TextStreamer thingy is our secret weapon for super smooth conversation flow
|
53 |
+
streamer = TextStreamer(tokenizer)
|
54 |
+
|
55 |
+
# Now, here comes the magic! ✨ This is the basic template for our chat
|
56 |
+
prompt = """
|
57 |
+
<|im_start|>system: {system}
|
58 |
+
<|im_end|>
|
59 |
+
<|im_start|>user: {insaan}
|
60 |
+
<|im_end|>
|
61 |
+
<|im_start|>assistant:
|
62 |
+
"""
|
63 |
+
|
64 |
+
# Okay, enough chit-chat, let's get down to business! Here's what will be our system prompt
|
65 |
+
system = "You are HelpingAI a emotional AI always answer my question in HelpingAI style"
|
66 |
+
|
67 |
+
|
68 |
+
# And the insaan is curious (like you!) insaan means human in hindi
|
69 |
+
insaan = "I'm excited because I just got accepted into my dream school! I wanted to share the good news with someone."
|
70 |
+
|
71 |
+
# Now we combine system and user messages into the template, like adding sprinkles to our conversation cupcake
|
72 |
+
prompt = prompt.format(system=system, insaan=insaan)
|
73 |
+
|
74 |
+
# Time to chat! We'll use the tokenizer to translate our text into a language the model understands
|
75 |
+
inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False).to("cuda")
|
76 |
+
|
77 |
+
# Here comes the fun part! Let's unleash the power of HelpingAI-3B to generate some awesome text
|
78 |
+
generated_text = model.generate(**inputs, max_length=3084, top_p=0.95, do_sample=True, temperature=0.6, use_cache=True, streamer=streamer)
|
79 |
+
|
80 |
+
|
81 |
+
```
|
82 |
+
*Directly using this model from GGUF*
|
83 |
+
|
84 |
+
```python
|
85 |
+
%pip install -U 'webscout[local]'
|
86 |
+
|
87 |
+
from webscout.Local.utils import download_model
|
88 |
+
from webscout.Local.model import Model
|
89 |
+
from webscout.Local.thread import Thread
|
90 |
+
from webscout.Local import formats
|
91 |
+
from webscout.Local.samplers import SamplerSettings
|
92 |
+
|
93 |
+
from dotenv import load_dotenv; load_dotenv()
|
94 |
+
import os
|
95 |
+
|
96 |
+
|
97 |
+
# 1. Download the model
|
98 |
+
repo_id = "OEvortex/HelpingAI-9B"
|
99 |
+
filename = "helpingai-9b.Q4_0.gguf"
|
100 |
+
model_path = download_model(repo_id, filename, os.environ.get("hf_token"))
|
101 |
+
|
102 |
+
# 2. Load the model
|
103 |
+
model = Model(model_path, n_gpu_layers=0)
|
104 |
+
|
105 |
+
# 3. Define your system prompt
|
106 |
+
system_prompt = "You are HelpingAI a emotional AI always answer my question in HelpingAI style"
|
107 |
+
|
108 |
+
# 4. Create a custom chatml format with your system prompt
|
109 |
+
custom_chatml = formats.chatml.copy()
|
110 |
+
custom_chatml['system_content'] = system_prompt
|
111 |
+
|
112 |
+
# 5. Define your sampler settings (optional)
|
113 |
+
sampler = SamplerSettings(temp=0.7, top_p=0.9) # Adjust these values as needed
|
114 |
+
|
115 |
+
# 6. Create a Thread with the custom format and sampler
|
116 |
+
thread = Thread(model, custom_chatml, sampler=sampler)
|
117 |
+
|
118 |
+
# 7. Start interacting with the model
|
119 |
+
thread.interact(header="🌟 HelpingAI-9B: Emotionally Intelligent Conversational AI 🚀", color=True)
|
120 |
+
```
|
121 |
+
## Example Dialogue
|
122 |
+
> Express joy and excitement about visiting a new place.
|
123 |
+
|
124 |
+
Yay! I'm super excited to visit this new place! 🎉 It sounds amazing and I can't wait to explore and have fun there! This is going to be such an exciting adventure! 🌟 Let's go, let's do this! ✨
|
125 |
+
|
126 |
+
> I'm feeling really down today. Nothing seems to be going right.
|
127 |
+
|
128 |
+
Oh no, I'm sorry to hear that you're feeling down. It happens sometimes, but remember that it's okay to feel sad. If you want to talk about what's bothering you, I'm here to listen and offer support. We can find a way through this together! 🌈
|
129 |
+
|
130 |
+
> I'm excited because I just got accepted into my dream school! I wanted to share the good news with someone.
|
131 |
+
|
132 |
+
Wow, that's amazing! 🎉 Congratulations on getting accepted into your dream school! That's such an incredible accomplishment and I'm really happy for you! Keep up the hard work and enjoy the journey ahead! 🌟 If there's anything else I can do to help, just let me know! 😊
|
133 |
+
|
134 |
+
> My best friend just got into an argument with me and I don't know what to do. How can I fix this situation?
|
135 |
+
|
136 |
+
Oh no, it sounds like you both need some time to cool off. Take a step back and give each other some space. Then, when things have calmed down, try talking things through in a calm and respectful manner. Communication is key for resolving conflicts, so let's work towards finding a resolution that works for both of you! 🌟 If you need any advice or support, I'm here to help! 😊
|
137 |
+
|
138 |
+
> How can you identify the emotions of others based on their body language and facial expressions?
|
139 |
+
|
140 |
+
Identifying the emotions of others is a great skill! 🤔 You can look for body language cues like frowns, smiles, or tense posture. Facial expressions also give away a lot about someone's feelings - watch out for changes in their eyes, mouth, and facial muscles. It's all about being observant and empathetic to understand others better! 😊
|
141 |
+
|
142 |
+
> What kind of safety devices do I need to install in my house
|
143 |
+
|
144 |
+
Installing security devices in your home can bring peace of mind and keep you safe. Some essential safety devices include a home alarm system, smoke detectors, carbon monoxide detectors, motion sensors, and surveillance cameras. It's always good to take precautions and ensure that your home is secure! 🌟 If you need help choosing the right security devices for your family, I can definitely point you in the right direction! 😊
|
145 |
+
|
146 |
+
> How can your own feelings affect your ability to empathize with someone else?
|
147 |
+
|
148 |
+
Our own emotions can impact our ability to empathize with others because they shape how we perceive and interpret other people's experiences. If you're feeling happy or positive, it may be easier to see the good in others and offer support. On the other hand, if you're feeling down or stressed, it might be harder to put yourself in someone else's shoes. It's important to acknowledge your own feelings and work on being open and understanding towards others! 🌟
|
benchmark_performance_comparison.png
ADDED
config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"LlamaForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_bias": false,
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 6,
|
8 |
+
"eos_token_id": 7,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 4096,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 11008,
|
13 |
+
"max_position_embeddings": 200000,
|
14 |
+
"mlp_bias": false,
|
15 |
+
"model_type": "llama",
|
16 |
+
"num_attention_heads": 32,
|
17 |
+
"num_hidden_layers": 48,
|
18 |
+
"num_key_value_heads": 4,
|
19 |
+
"pad_token_id": 7,
|
20 |
+
"pretraining_tp": 1,
|
21 |
+
"rms_norm_eps": 1e-06,
|
22 |
+
"rope_scaling": null,
|
23 |
+
"rope_theta": 5000000.0,
|
24 |
+
"tie_word_embeddings": false,
|
25 |
+
"torch_dtype": "float16",
|
26 |
+
"transformers_version": "4.41.0",
|
27 |
+
"use_cache": false,
|
28 |
+
"vocab_size": 64000
|
29 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 6,
|
4 |
+
"eos_token_id": 7,
|
5 |
+
"pad_token_id": 7,
|
6 |
+
"transformers_version": "4.41.0"
|
7 |
+
}
|
helpingai-9b.Q4_0.gguf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e647b128346d067038e083a3edc79651905ba47a5d60ecdf6eb3c2cfffb24275
|
3 |
+
size 5036994912
|
model-00001-of-00002.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5780e868d774ce2b44f44ed4365a45c66d33db028195d38396f18d9f3876caae
|
3 |
+
size 9943068568
|
model-00002-of-00002.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd6c845dfbb48232a031573461a64427dea332ab42951e9bc74ca6a8f4f730eb
|
3 |
+
size 7715796040
|
model.safetensors.index.json
ADDED
@@ -0,0 +1,442 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"total_size": 17658814464
|
4 |
+
},
|
5 |
+
"weight_map": {
|
6 |
+
"lm_head.weight": "model-00002-of-00002.safetensors",
|
7 |
+
"model.embed_tokens.weight": "model-00001-of-00002.safetensors",
|
8 |
+
"model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
9 |
+
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
10 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
11 |
+
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
12 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
13 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
14 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
15 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
16 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
17 |
+
"model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
18 |
+
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
19 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
20 |
+
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
21 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
22 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
23 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
24 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
25 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
26 |
+
"model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
27 |
+
"model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
28 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
29 |
+
"model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
30 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
31 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
32 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
33 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
34 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
35 |
+
"model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
36 |
+
"model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
37 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
38 |
+
"model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
39 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
40 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
41 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
42 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
43 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
44 |
+
"model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
45 |
+
"model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
46 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
47 |
+
"model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
48 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
49 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
50 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
51 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
52 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
53 |
+
"model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
54 |
+
"model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
55 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
56 |
+
"model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
57 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
58 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
59 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
60 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
61 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
62 |
+
"model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
63 |
+
"model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
64 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
65 |
+
"model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
66 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
67 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
68 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
69 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
70 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
71 |
+
"model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
72 |
+
"model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
73 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
74 |
+
"model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
75 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
76 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
77 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
78 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
79 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
80 |
+
"model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
81 |
+
"model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
82 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
83 |
+
"model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
84 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
85 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
86 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
87 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
88 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
89 |
+
"model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
90 |
+
"model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
91 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
92 |
+
"model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
93 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
94 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
95 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
96 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
97 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
98 |
+
"model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
99 |
+
"model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
100 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
101 |
+
"model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
102 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
103 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
104 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
105 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
106 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
107 |
+
"model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
108 |
+
"model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
109 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
110 |
+
"model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
111 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
112 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
113 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
114 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
115 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
116 |
+
"model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
117 |
+
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
118 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
119 |
+
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
120 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
121 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
122 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
123 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
124 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
125 |
+
"model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
126 |
+
"model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
127 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
128 |
+
"model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
129 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
130 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
131 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
132 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
133 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
134 |
+
"model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
135 |
+
"model.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
136 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
137 |
+
"model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
138 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
139 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
140 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
141 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
142 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
143 |
+
"model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
144 |
+
"model.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
145 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
146 |
+
"model.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
147 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
148 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
149 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
150 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
151 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
152 |
+
"model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
153 |
+
"model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
154 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
155 |
+
"model.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
156 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
157 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
158 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
159 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
160 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
161 |
+
"model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
162 |
+
"model.layers.24.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
163 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
164 |
+
"model.layers.24.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
165 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
166 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
167 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
168 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
169 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
170 |
+
"model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
171 |
+
"model.layers.25.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
172 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
173 |
+
"model.layers.25.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
174 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
175 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
176 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
177 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
178 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
179 |
+
"model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
180 |
+
"model.layers.26.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
181 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
182 |
+
"model.layers.26.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
183 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
184 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
185 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
186 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
187 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
188 |
+
"model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
189 |
+
"model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
190 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
191 |
+
"model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
192 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
193 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
194 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
195 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
196 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
197 |
+
"model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
198 |
+
"model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
199 |
+
"model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
200 |
+
"model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
201 |
+
"model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
202 |
+
"model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
203 |
+
"model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
204 |
+
"model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
205 |
+
"model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
206 |
+
"model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
207 |
+
"model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
208 |
+
"model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
209 |
+
"model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
210 |
+
"model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
211 |
+
"model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
212 |
+
"model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
213 |
+
"model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
214 |
+
"model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
215 |
+
"model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
216 |
+
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
217 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
218 |
+
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
219 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
220 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
221 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
222 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
223 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
224 |
+
"model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
225 |
+
"model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
226 |
+
"model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
227 |
+
"model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
228 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
229 |
+
"model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
230 |
+
"model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
231 |
+
"model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
232 |
+
"model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
233 |
+
"model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
234 |
+
"model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
235 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
236 |
+
"model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
237 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
238 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
239 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
240 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
241 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
242 |
+
"model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
243 |
+
"model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
244 |
+
"model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
245 |
+
"model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
246 |
+
"model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
247 |
+
"model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
248 |
+
"model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
249 |
+
"model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
250 |
+
"model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
251 |
+
"model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
252 |
+
"model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
253 |
+
"model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
254 |
+
"model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
255 |
+
"model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
256 |
+
"model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
257 |
+
"model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
258 |
+
"model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
259 |
+
"model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
260 |
+
"model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
261 |
+
"model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
262 |
+
"model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
263 |
+
"model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
264 |
+
"model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
265 |
+
"model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
266 |
+
"model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
267 |
+
"model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
268 |
+
"model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
269 |
+
"model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
270 |
+
"model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
271 |
+
"model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
272 |
+
"model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
273 |
+
"model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
274 |
+
"model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
275 |
+
"model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
276 |
+
"model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
277 |
+
"model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
278 |
+
"model.layers.36.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
279 |
+
"model.layers.36.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
280 |
+
"model.layers.36.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
281 |
+
"model.layers.36.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
282 |
+
"model.layers.36.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
283 |
+
"model.layers.36.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
284 |
+
"model.layers.36.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
285 |
+
"model.layers.36.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
286 |
+
"model.layers.36.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
287 |
+
"model.layers.37.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
288 |
+
"model.layers.37.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
289 |
+
"model.layers.37.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
290 |
+
"model.layers.37.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
291 |
+
"model.layers.37.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
292 |
+
"model.layers.37.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
293 |
+
"model.layers.37.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
294 |
+
"model.layers.37.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
295 |
+
"model.layers.37.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
296 |
+
"model.layers.38.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
297 |
+
"model.layers.38.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
298 |
+
"model.layers.38.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
299 |
+
"model.layers.38.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
300 |
+
"model.layers.38.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
301 |
+
"model.layers.38.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
302 |
+
"model.layers.38.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
303 |
+
"model.layers.38.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
304 |
+
"model.layers.38.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
305 |
+
"model.layers.39.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
306 |
+
"model.layers.39.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
307 |
+
"model.layers.39.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
308 |
+
"model.layers.39.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
309 |
+
"model.layers.39.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
310 |
+
"model.layers.39.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
311 |
+
"model.layers.39.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
312 |
+
"model.layers.39.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
313 |
+
"model.layers.39.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
314 |
+
"model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
315 |
+
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
316 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
317 |
+
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
318 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
319 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
320 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
321 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
322 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
323 |
+
"model.layers.40.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
324 |
+
"model.layers.40.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
325 |
+
"model.layers.40.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
326 |
+
"model.layers.40.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
327 |
+
"model.layers.40.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
328 |
+
"model.layers.40.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
329 |
+
"model.layers.40.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
330 |
+
"model.layers.40.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
331 |
+
"model.layers.40.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
332 |
+
"model.layers.41.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
333 |
+
"model.layers.41.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
334 |
+
"model.layers.41.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
335 |
+
"model.layers.41.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
336 |
+
"model.layers.41.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
337 |
+
"model.layers.41.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
338 |
+
"model.layers.41.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
339 |
+
"model.layers.41.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
340 |
+
"model.layers.41.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
341 |
+
"model.layers.42.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
342 |
+
"model.layers.42.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
343 |
+
"model.layers.42.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
344 |
+
"model.layers.42.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
345 |
+
"model.layers.42.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
346 |
+
"model.layers.42.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
347 |
+
"model.layers.42.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
348 |
+
"model.layers.42.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
349 |
+
"model.layers.42.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
350 |
+
"model.layers.43.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
351 |
+
"model.layers.43.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
352 |
+
"model.layers.43.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
353 |
+
"model.layers.43.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
354 |
+
"model.layers.43.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
355 |
+
"model.layers.43.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
356 |
+
"model.layers.43.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
357 |
+
"model.layers.43.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
358 |
+
"model.layers.43.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
359 |
+
"model.layers.44.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
360 |
+
"model.layers.44.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
361 |
+
"model.layers.44.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
362 |
+
"model.layers.44.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
363 |
+
"model.layers.44.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
364 |
+
"model.layers.44.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
365 |
+
"model.layers.44.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
366 |
+
"model.layers.44.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
367 |
+
"model.layers.44.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
368 |
+
"model.layers.45.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
369 |
+
"model.layers.45.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
370 |
+
"model.layers.45.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
371 |
+
"model.layers.45.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
372 |
+
"model.layers.45.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
373 |
+
"model.layers.45.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
374 |
+
"model.layers.45.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
375 |
+
"model.layers.45.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
376 |
+
"model.layers.45.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
377 |
+
"model.layers.46.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
378 |
+
"model.layers.46.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
379 |
+
"model.layers.46.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
380 |
+
"model.layers.46.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
381 |
+
"model.layers.46.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
382 |
+
"model.layers.46.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
383 |
+
"model.layers.46.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
384 |
+
"model.layers.46.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
385 |
+
"model.layers.46.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
386 |
+
"model.layers.47.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
387 |
+
"model.layers.47.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
388 |
+
"model.layers.47.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
389 |
+
"model.layers.47.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
390 |
+
"model.layers.47.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
391 |
+
"model.layers.47.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
392 |
+
"model.layers.47.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
393 |
+
"model.layers.47.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
394 |
+
"model.layers.47.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
395 |
+
"model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
396 |
+
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
397 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
398 |
+
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
399 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
400 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
401 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
402 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
403 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
404 |
+
"model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
405 |
+
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
406 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
407 |
+
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
408 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
409 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
410 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
411 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
412 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
413 |
+
"model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
414 |
+
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
415 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
416 |
+
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
417 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
418 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
419 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
420 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
421 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
422 |
+
"model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
423 |
+
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
424 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
425 |
+
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
426 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
427 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
428 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
429 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
430 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
431 |
+
"model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
432 |
+
"model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
433 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
434 |
+
"model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
435 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
436 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
437 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
438 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
439 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
440 |
+
"model.norm.weight": "model-00002-of-00002.safetensors"
|
441 |
+
}
|
442 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
{
|
4 |
+
"content": "<|im_start|>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false
|
9 |
+
}
|
10 |
+
],
|
11 |
+
"bos_token": {
|
12 |
+
"content": "<|startoftext|>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false
|
17 |
+
},
|
18 |
+
"eos_token": {
|
19 |
+
"content": "<|im_end|>",
|
20 |
+
"lstrip": false,
|
21 |
+
"normalized": false,
|
22 |
+
"rstrip": false,
|
23 |
+
"single_word": false
|
24 |
+
},
|
25 |
+
"pad_token": {
|
26 |
+
"content": "<unk>",
|
27 |
+
"lstrip": false,
|
28 |
+
"normalized": false,
|
29 |
+
"rstrip": false,
|
30 |
+
"single_word": false
|
31 |
+
},
|
32 |
+
"unk_token": {
|
33 |
+
"content": "<unk>",
|
34 |
+
"lstrip": false,
|
35 |
+
"normalized": false,
|
36 |
+
"rstrip": false,
|
37 |
+
"single_word": false
|
38 |
+
}
|
39 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<|startoftext|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "<|endoftext|>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"6": {
|
30 |
+
"content": "<|im_start|>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": false,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": true
|
36 |
+
},
|
37 |
+
"7": {
|
38 |
+
"content": "<|im_end|>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": true
|
44 |
+
}
|
45 |
+
},
|
46 |
+
"additional_special_tokens": [
|
47 |
+
"<|im_start|>"
|
48 |
+
],
|
49 |
+
"bos_token": "<|startoftext|>",
|
50 |
+
"chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\\n' + system_message + '<|im_end|>\\n' }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\\n' + content + '<|im_end|>\\n<|im_start|>assistant\\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\\n' }}{% endif %}{% endfor %}",
|
51 |
+
"clean_up_tokenization_spaces": false,
|
52 |
+
"eos_token": "<|im_end|>",
|
53 |
+
"legacy": true,
|
54 |
+
"model_max_length": 4096,
|
55 |
+
"pad_token": "<unk>",
|
56 |
+
"padding_side": "right",
|
57 |
+
"sp_model_kwargs": {},
|
58 |
+
"split_special_tokens": false,
|
59 |
+
"tokenizer_class": "LlamaTokenizer",
|
60 |
+
"unk_token": "<unk>",
|
61 |
+
"use_default_system_prompt": false
|
62 |
+
}
|