zezeze97 commited on
Commit
d3d4ef5
1 Parent(s): 9b69c72

first commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,18 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model-00005-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
37
+ model-00006-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
38
+ model-00007-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
39
+ model-00008-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
40
+ model-00013-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
41
+ model-00002-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
42
+ model-00003-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
43
+ model-00011-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
44
+ model-00012-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
45
+ model-00014-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
46
+ model-00015-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
47
+ model-00001-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
48
+ model-00004-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
49
+ model-00009-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
50
+ model-00010-of-00015.safetensors filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ # 基于FormalGeo7K的推理模型
5
+
6
+ ## 快速开始
7
+ 在运行脚本之前,首先安装如下必要的依赖。
8
+
9
+ ```shell
10
+ pip install --upgrade pip
11
+ pip install torch transformers==4.40.0
12
+ pip install sentencepiece protobuf
13
+ pip install accelerate pillow
14
+ pip install ninja
15
+ pip install packaging
16
+ pip install flash-attn --no-build-isolation
17
+ ```
18
+
19
+ ```python
20
+ import torch
21
+ import transformers
22
+ from transformers import AutoModelForCausalLM, AutoTokenizer
23
+ from PIL import Image
24
+ import warnings
25
+ import numpy as np
26
+
27
+ # set device
28
+ device = 'cuda' # or cpu
29
+ torch.set_default_device(device)
30
+
31
+ # create model
32
+ model = AutoModelForCausalLM.from_pretrained(
33
+ 'NaughtyDog97/FormalEnhencedGPS-34B',
34
+ torch_dtype=torch.float16, # float32 for cpu
35
+ device_map='auto',
36
+ trust_remote_code=True)
37
+ tokenizer = AutoTokenizer.from_pretrained(
38
+ 'NaughtyDog97/FormalEnhencedGPS-34B',
39
+ trust_remote_code=True)
40
+
41
+ # text prompt
42
+ img_path = 'sample/4927.png'
43
+ qs = 'As shown in the diagram, AE/AB=1/4, M is the midpoint of segment AC, BE is parallel to CP, EA is parallel to CP. Find the ratio of the length of line BC to the length of line CD.'
44
+ prompt = f'Using the provided geometric image and question, first predict the construction_cdl and image_cdl. Then, give a detailed step-by-step solution.\nThe question is:\n{qs}'
45
+ text = f'<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n'
46
+ text_chunks = [tokenizer(chunk).input_ids for chunk in text.split('<image>')]
47
+ input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1][1:], dtype=torch.long).unsqueeze(0).to(device)
48
+
49
+ # image, sample images can be found in images folder
50
+ image = Image.open(img_path).convert('RGB')
51
+
52
+ image_tensor = model.process_images([image], model.config).to(dtype=model.dtype, device=device)
53
+
54
+ # generate
55
+ with torch.inference_mode():
56
+ output_ids = model.generate(
57
+ input_ids,
58
+ images=image_tensor,
59
+ do_sample=False,
60
+ temperature=None,
61
+ top_p=None,
62
+ top_k=None,
63
+ num_beams=1,
64
+ max_new_tokens=3500,
65
+ eos_token_id=tokenizer.eos_token_id,
66
+ repetition_penalty=None,
67
+ use_cache=True
68
+ )[0]
69
+
70
+
71
+ respones = tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
72
+ print(respones)
73
+
74
+ ```
75
+
76
+ 我们的模型支持的求解方式有如下三种:
77
+ ```python
78
+ # Q => Predicted CDL + CoT Answer
79
+ prompt = f'Using the provided geometric image and question, first predict the construction_cdl and image_cdl. Then, give a detailed step-by-step solution.\nThe question is:\n{qs}'
80
+
81
+
82
+ # Q + Predicted CDL => CoT Answer
83
+ prompt = f'Using the provided geometric image, construction_cdl, image_cdl, and question, give a detailed step-by-step solution. Note that there may be minor errors in the construction_cdl and image_cdl.\nThe construction_cdl is:\n{predict_consCDL}\nThe image_cdl is:\n{predict_imgCDL}\nThe question is:\n{qs}'
84
+
85
+
86
+ # Q + Predicted CDL => Calibrated CDL + CoT Answer
87
+ prompt = f'Using the provided geometric image and the possibly erroneous construction_cdl and image_cdl, first calibrate the construction_cdl and image_cdl, then give a detailed step-by-step solution to the question.\nThe initial construction_cdl is:\n{predict_consCDL}\nThe initial image_cdl is:\n{predict_imgCDL}\nThe question is:\n{qs}'
88
+
89
+
90
+ ```
91
+
92
+ ## 结合Formalization模型的推理
93
+ ```python
94
+ import torch
95
+ import transformers
96
+ from transformers import AutoModelForCausalLM, AutoTokenizer
97
+ from PIL import Image
98
+ import warnings
99
+ import numpy as np
100
+ import re
101
+
102
+
103
+ def parse_cdl(input_string):
104
+ # 使用正则表达式查找各个部分
105
+ patterns = {
106
+ 'construction_cdl': r'(?:The )?(?:calibrate )?construction_cdl(?: is)?:\n(.*?)(?=\n(?:The )?(?:calibrate )?\w+_cdl is:|\n(?:The )?(?:calibrate )?\w+_cdl:|\nSolution is:|\Z)',
107
+ 'image_cdl': r'(?:The )?(?:calibrate )?image_cdl(?: is)?:\n(.*?)(?=\n(?:The )?(?:calibrate )?\w+_cdl is:|\n(?:The )?(?:calibrate )?\w+_cdl:|\nSolution is:|\Z)',
108
+ 'text_cdl': r'(?:The )?text_cdl(?: is)?:\n(.*?)(?=\n(?:The )?\w+_cdl is:|\n(?:The )?\w+_cdl:|\nSolution is:|\Z)',
109
+ 'goal_cdl': r'(?:The )?goal_cdl(?: is)?:\n(.*?)(?=\n(?:The )?\w+_cdl is:|\n(?:The )?\w+_cdl:|\nSolution is:|\Z)'
110
+ }
111
+
112
+ results = {}
113
+
114
+ # 优先匹配包含"calibrate"的版本
115
+ for key, pattern in patterns.items():
116
+ pattern = pattern.replace("(?:calibrate )?", "(?:calibrate )")
117
+ match = re.search(pattern, input_string, re.DOTALL)
118
+ if match:
119
+ results[key] = match.group(1).strip()
120
+ else:
121
+ # 如果未找到包含"calibrate"的版本,尝试匹配不含"calibrate"的版本
122
+ pattern = pattern.replace("(?:calibrate )", "(?:calibrate )?")
123
+ match = re.search(pattern, input_string, re.DOTALL)
124
+ if match:
125
+ results[key] = match.group(1).strip()
126
+
127
+ return results
128
+
129
+
130
+ # set device
131
+ device = 'cuda' # or cpu
132
+ torch.set_default_device(device)
133
+
134
+ # create model
135
+ formalization_model = AutoModelForCausalLM.from_pretrained(
136
+ 'NaughtyDog97/GeoFormalizer',
137
+ torch_dtype=torch.float16, # float32 for cpu
138
+ device_map='auto',
139
+ trust_remote_code=True)
140
+
141
+ formalization_tokenizer = AutoTokenizer.from_pretrained(
142
+ 'NaughtyDog97/GeoFormalizer',
143
+ trust_remote_code=True)
144
+
145
+
146
+ reason_model = AutoModelForCausalLM.from_pretrained(
147
+ 'NaughtyDog97/FormalEnhencedGPS-34B',
148
+ torch_dtype=torch.float16, # float32 for cpu
149
+ device_map='auto',
150
+ trust_remote_code=True)
151
+ reason_tokenizer = AutoTokenizer.from_pretrained(
152
+ 'NaughtyDog97/FormalEnhencedGPS-34B',
153
+ trust_remote_code=True)
154
+
155
+
156
+
157
+ img_path = 'sample/4927.png'
158
+ image = Image.open(img_path).convert('RGB')
159
+
160
+
161
+ # formalization
162
+ prompt = 'Based on the image, first describe what you see in the figure, then predict the construction_cdl and image_cdl and calibrate it.'
163
+ text = f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
164
+ text_chunks = [formalization_tokenizer(chunk).input_ids for chunk in text.split('<image>')]
165
+ input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1][1:], dtype=torch.long).unsqueeze(0).to(device)
166
+
167
+ # generate
168
+ image_tensor = formalization_model.process_images([image], formalization_model.config).to(dtype=formalization_model.dtype, device=device)
169
+ with torch.inference_mode():
170
+ output_ids = formalization_model.generate(
171
+ input_ids,
172
+ images=image_tensor,
173
+ do_sample=False,
174
+ temperature=None,
175
+ top_p=None,
176
+ top_k=None,
177
+ num_beams=1,
178
+ max_new_tokens=3500,
179
+ eos_token_id=formalization_tokenizer.eos_token_id,
180
+ repetition_penalty=None,
181
+ use_cache=True
182
+ )[0]
183
+
184
+
185
+ respones = formalization_tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
186
+ print(f'Formalization result is\n{respones}')
187
+ cdl_info = parse_cdl(respones)
188
+ predict_consCDL = cdl_info['construction_cdl']
189
+ predict_imgCDL = cdl_info['image_cdl']
190
+
191
+
192
+
193
+ # reasoning
194
+
195
+ qs = 'As shown in the diagram, AE/AB=1/4, M is the midpoint of segment AC, BE is parallel to CP, EA is parallel to CP. Find the ratio of the length of line BC to the length of line CD.'
196
+ prompt = f'Using the provided geometric image and the possibly erroneous construction_cdl and image_cdl, first calibrate the construction_cdl and image_cdl, then give a detailed step-by-step solution to the question.\nThe initial construction_cdl is:\n{predict_consCDL}\nThe initial image_cdl is:\n{predict_imgCDL}\nThe question is:\n{qs}'
197
+ text = f'<|im_start|>user\n<image>\n{prompt}<|im_end|>\n<|im_start|>assistant\n'
198
+ text_chunks = [reason_tokenizer(chunk).input_ids for chunk in text.split('<image>')]
199
+ input_ids = torch.tensor(text_chunks[0] + [-200] + text_chunks[1][1:], dtype=torch.long).unsqueeze(0).to(device)
200
+
201
+
202
+
203
+ # generate
204
+ image_tensor = reason_model.process_images([image], reason_model.config).to(dtype=reason_model.dtype, device=device)
205
+ with torch.inference_mode():
206
+ output_ids = reason_model.generate(
207
+ input_ids,
208
+ images=image_tensor,
209
+ do_sample=False,
210
+ temperature=None,
211
+ top_p=None,
212
+ top_k=None,
213
+ num_beams=1,
214
+ max_new_tokens=3500,
215
+ eos_token_id=reason_tokenizer.eos_token_id,
216
+ repetition_penalty=None,
217
+ use_cache=True
218
+ )[0]
219
+
220
+ respones = reason_tokenizer.decode(output_ids[input_ids.shape[1]:], skip_special_tokens=True).strip()
221
+ print(f'Reasoning steps is\n{respones}')
222
+
223
+
224
+
225
+ ```
226
+
227
+
228
+
229
+ ## Performance
230
+ | | Q => Predicted CDL + CoT Answer | Q + Predicted CDL => CoT Answer | Q + Predicted CDL => Calibrated CDL + CoT Answer |
231
+ |-----|-------------------------------------|--------------------------------------|------------------------------------------------------|
232
+ | siglip-0.4B-yi1.5-9B | 71.84/80.58 | 72.17/81.72 | 72.33/81.72 |
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "NaughtyDog97/FormalEnhencedGPS-34B",
3
+ "architectures": [
4
+ "FegeoLlamaForCausalLM"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_fegeo_llama.FEGeoLlamaConfig",
8
+ "AutoModelForCausalLM": "modeling_fegeo_llama.FEGeoLlamaForCausalLM"
9
+ },
10
+ "attention_bias": false,
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "eos_token_id": 7,
14
+ "freeze_mm_mlp_adapter": false,
15
+ "freeze_vision_tower": true,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 7168,
18
+ "image_aspect_ratio": "pad",
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 20480,
21
+ "max_position_embeddings": 4096,
22
+ "mm_hidden_size": 1152,
23
+ "mm_projector_lr": 1e-05,
24
+ "mm_projector_type": "mlp2x_gelu",
25
+ "mm_vision_tower": "google/siglip-so400m-patch14-384",
26
+ "model_type": "fegeo-llama",
27
+ "num_attention_heads": 56,
28
+ "num_hidden_layers": 60,
29
+ "num_key_value_heads": 8,
30
+ "pad_token_id": 0,
31
+ "pretraining_tp": 1,
32
+ "rms_norm_eps": 1e-06,
33
+ "rope_scaling": null,
34
+ "rope_theta": 5000000.0,
35
+ "tie_word_embeddings": false,
36
+ "tokenizer_model_max_length": 4096,
37
+ "tokenizer_padding_side": "right",
38
+ "torch_dtype": "float16",
39
+ "transformers_version": "4.40.0",
40
+ "tune_mm_mlp_adapter": false,
41
+ "tune_vision_tower": false,
42
+ "use_cache": true,
43
+ "use_mm_proj": true,
44
+ "use_s2": false,
45
+ "vocab_size": 64000
46
+ }
configuration_fegeo_llama copy.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
3
+ #
4
+ # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
5
+ # and OPT implementations in this library. It has been modified from its
6
+ # original forms to accommodate minor architectural differences compared
7
+ # to GPT-NeoX and OPT used by the Meta AI team that trained the model.
8
+ #
9
+ # Licensed under the Apache License, Version 2.0 (the "License");
10
+ # you may not use this file except in compliance with the License.
11
+ # You may obtain a copy of the License at
12
+ #
13
+ # http://www.apache.org/licenses/LICENSE-2.0
14
+ #
15
+ # Unless required by applicable law or agreed to in writing, software
16
+ # distributed under the License is distributed on an "AS IS" BASIS,
17
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ # See the License for the specific language governing permissions and
19
+ # limitations under the License.
20
+ """ LLaMA model configuration"""
21
+
22
+ from transformers.configuration_utils import PretrainedConfig
23
+ from transformers.utils import logging
24
+
25
+
26
+ logger = logging.get_logger(__name__)
27
+
28
+
29
+ from transformers.models.deprecated._archive_maps import LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP # noqa: F401, E402
30
+
31
+
32
+ class LlamaConfig(PretrainedConfig):
33
+ r"""
34
+ This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
35
+ model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
36
+ defaults will yield a similar configuration to that of the LLaMA-7B.
37
+
38
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
39
+ documentation from [`PretrainedConfig`] for more information.
40
+
41
+
42
+ Args:
43
+ vocab_size (`int`, *optional*, defaults to 32000):
44
+ Vocabulary size of the LLaMA model. Defines the number of different tokens that can be represented by the
45
+ `inputs_ids` passed when calling [`LlamaModel`]
46
+ hidden_size (`int`, *optional*, defaults to 4096):
47
+ Dimension of the hidden representations.
48
+ intermediate_size (`int`, *optional*, defaults to 11008):
49
+ Dimension of the MLP representations.
50
+ num_hidden_layers (`int`, *optional*, defaults to 32):
51
+ Number of hidden layers in the Transformer decoder.
52
+ num_attention_heads (`int`, *optional*, defaults to 32):
53
+ Number of attention heads for each attention layer in the Transformer decoder.
54
+ num_key_value_heads (`int`, *optional*):
55
+ This is the number of key_value heads that should be used to implement Grouped Query Attention. If
56
+ `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
57
+ `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
58
+ converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
59
+ by meanpooling all the original heads within that group. For more details checkout [this
60
+ paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
61
+ `num_attention_heads`.
62
+ hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
63
+ The non-linear activation function (function or string) in the decoder.
64
+ max_position_embeddings (`int`, *optional*, defaults to 2048):
65
+ The maximum sequence length that this model might ever be used with. Llama 1 supports up to 2048 tokens,
66
+ Llama 2 up to 4096, CodeLlama up to 16384.
67
+ initializer_range (`float`, *optional*, defaults to 0.02):
68
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
69
+ rms_norm_eps (`float`, *optional*, defaults to 1e-06):
70
+ The epsilon used by the rms normalization layers.
71
+ use_cache (`bool`, *optional*, defaults to `True`):
72
+ Whether or not the model should return the last key/values attentions (not used by all models). Only
73
+ relevant if `config.is_decoder=True`.
74
+ pad_token_id (`int`, *optional*):
75
+ Padding token id.
76
+ bos_token_id (`int`, *optional*, defaults to 1):
77
+ Beginning of stream token id.
78
+ eos_token_id (`int`, *optional*, defaults to 2):
79
+ End of stream token id.
80
+ pretraining_tp (`int`, *optional*, defaults to 1):
81
+ Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this
82
+ document](https://huggingface.co/docs/transformers/main/perf_train_gpu_many#tensor-parallelism) to understand more about it. This value is
83
+ necessary to ensure exact reproducibility of the pretraining results. Please refer to [this
84
+ issue](https://github.com/pytorch/pytorch/issues/76232).
85
+ tie_word_embeddings (`bool`, *optional*, defaults to `False`):
86
+ Whether to tie weight embeddings
87
+ rope_theta (`float`, *optional*, defaults to 10000.0):
88
+ The base period of the RoPE embeddings.
89
+ rope_scaling (`Dict`, *optional*):
90
+ Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
91
+ strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
92
+ `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update
93
+ `max_position_embeddings` to the expected new maximum. See the following thread for more information on how
94
+ these scaling strategies behave:
95
+ https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an
96
+ experimental feature, subject to breaking API changes in future versions.
97
+ attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
98
+ Whether to use a bias in the query, key, value and output projection layers during self-attention.
99
+ attention_dropout (`float`, *optional*, defaults to 0.0):
100
+ The dropout ratio for the attention probabilities.
101
+
102
+ ```python
103
+ >>> from transformers import LlamaModel, LlamaConfig
104
+
105
+ >>> # Initializing a LLaMA llama-7b style configuration
106
+ >>> configuration = LlamaConfig()
107
+
108
+ >>> # Initializing a model from the llama-7b style configuration
109
+ >>> model = LlamaModel(configuration)
110
+
111
+ >>> # Accessing the model configuration
112
+ >>> configuration = model.config
113
+ ```"""
114
+
115
+ model_type = "llama"
116
+ keys_to_ignore_at_inference = ["past_key_values"]
117
+
118
+ def __init__(
119
+ self,
120
+ vocab_size=32000,
121
+ hidden_size=4096,
122
+ intermediate_size=11008,
123
+ num_hidden_layers=32,
124
+ num_attention_heads=32,
125
+ num_key_value_heads=None,
126
+ hidden_act="silu",
127
+ max_position_embeddings=2048,
128
+ initializer_range=0.02,
129
+ rms_norm_eps=1e-6,
130
+ use_cache=True,
131
+ pad_token_id=None,
132
+ bos_token_id=1,
133
+ eos_token_id=2,
134
+ pretraining_tp=1,
135
+ tie_word_embeddings=False,
136
+ rope_theta=10000.0,
137
+ rope_scaling=None,
138
+ attention_bias=False,
139
+ attention_dropout=0.0,
140
+ **kwargs,
141
+ ):
142
+ self.vocab_size = vocab_size
143
+ self.max_position_embeddings = max_position_embeddings
144
+ self.hidden_size = hidden_size
145
+ self.intermediate_size = intermediate_size
146
+ self.num_hidden_layers = num_hidden_layers
147
+ self.num_attention_heads = num_attention_heads
148
+
149
+ # for backward compatibility
150
+ if num_key_value_heads is None:
151
+ num_key_value_heads = num_attention_heads
152
+
153
+ self.num_key_value_heads = num_key_value_heads
154
+ self.hidden_act = hidden_act
155
+ self.initializer_range = initializer_range
156
+ self.rms_norm_eps = rms_norm_eps
157
+ self.pretraining_tp = pretraining_tp
158
+ self.use_cache = use_cache
159
+ self.rope_theta = rope_theta
160
+ self.rope_scaling = rope_scaling
161
+ self._rope_scaling_validation()
162
+ self.attention_bias = attention_bias
163
+ self.attention_dropout = attention_dropout
164
+
165
+ super().__init__(
166
+ pad_token_id=pad_token_id,
167
+ bos_token_id=bos_token_id,
168
+ eos_token_id=eos_token_id,
169
+ tie_word_embeddings=tie_word_embeddings,
170
+ **kwargs,
171
+ )
172
+
173
+ def _rope_scaling_validation(self):
174
+ """
175
+ Validate the `rope_scaling` configuration.
176
+ """
177
+ if self.rope_scaling is None:
178
+ return
179
+
180
+ if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
181
+ raise ValueError(
182
+ "`rope_scaling` must be a dictionary with two fields, `type` and `factor`, " f"got {self.rope_scaling}"
183
+ )
184
+ rope_scaling_type = self.rope_scaling.get("type", None)
185
+ rope_scaling_factor = self.rope_scaling.get("factor", None)
186
+ if rope_scaling_type is None or rope_scaling_type not in ["linear", "dynamic"]:
187
+ raise ValueError(
188
+ f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
189
+ )
190
+ if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
191
+ raise ValueError(f"`rope_scaling`'s factor field must be a float > 1, got {rope_scaling_factor}")
192
+
193
+
194
+
195
+ """Vision model configuration"""
196
+ from typing import Union
197
+ from transformers import PretrainedConfig
198
+ import os
199
+
200
+
201
+ class SigLipVisionConfig(PretrainedConfig):
202
+ model_type = "siglip_vision_model"
203
+
204
+ def __init__(
205
+ self,
206
+ hidden_size=1152,
207
+ image_mean=(0.5, 0.5, 0.5),
208
+ intermediate_size=4304,
209
+ num_hidden_layers=27,
210
+ num_attention_heads=16,
211
+ num_channels=3,
212
+ image_size=384,
213
+ patch_size=14,
214
+ hidden_act="gelu_pytorch_tanh",
215
+ layer_norm_eps=1e-6,
216
+ attention_dropout=0.0,
217
+ **kwargs,
218
+ ):
219
+ super().__init__(**kwargs)
220
+
221
+ self.hidden_size = hidden_size
222
+ self.intermediate_size = intermediate_size
223
+ self.num_hidden_layers = num_hidden_layers
224
+ self.num_attention_heads = num_attention_heads
225
+ self.num_channels = num_channels
226
+ self.patch_size = patch_size
227
+ self.image_size = image_size
228
+ self.attention_dropout = attention_dropout
229
+ self.layer_norm_eps = layer_norm_eps
230
+ self.hidden_act = hidden_act
231
+ self.image_mean = image_mean
232
+
233
+ @classmethod
234
+ def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
235
+ cls._set_token_in_kwargs(kwargs)
236
+
237
+ config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
238
+
239
+ # get the vision config dict if we are loading from SigLipConfig
240
+ if config_dict.get("model_type") == "siglip":
241
+ config_dict = config_dict["vision_config"]
242
+
243
+ if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
244
+ logger.warning(
245
+ f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
246
+ f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
247
+ )
248
+
249
+ return cls.from_dict(config_dict, **kwargs)
250
+
251
+
252
+ class FEGeoLlamaConfig(LlamaConfig):
253
+ model_type = "fegeo-llama"
configuration_fegeo_llama.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
3
+ #
4
+ # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
5
+ # and OPT implementations in this library. It has been modified from its
6
+ # original forms to accommodate minor architectural differences compared
7
+ # to GPT-NeoX and OPT used by the Meta AI team that trained the model.
8
+ #
9
+ # Licensed under the Apache License, Version 2.0 (the "License");
10
+ # you may not use this file except in compliance with the License.
11
+ # You may obtain a copy of the License at
12
+ #
13
+ # http://www.apache.org/licenses/LICENSE-2.0
14
+ #
15
+ # Unless required by applicable law or agreed to in writing, software
16
+ # distributed under the License is distributed on an "AS IS" BASIS,
17
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ # See the License for the specific language governing permissions and
19
+ # limitations under the License.
20
+ """ LLaMA model configuration"""
21
+
22
+ from transformers.configuration_utils import PretrainedConfig
23
+ from transformers.utils import logging
24
+
25
+
26
+ logger = logging.get_logger(__name__)
27
+
28
+
29
+ from transformers.models.deprecated._archive_maps import LLAMA_PRETRAINED_CONFIG_ARCHIVE_MAP # noqa: F401, E402
30
+
31
+
32
+ class LlamaConfig(PretrainedConfig):
33
+ r"""
34
+ This is the configuration class to store the configuration of a [`LlamaModel`]. It is used to instantiate an LLaMA
35
+ model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
36
+ defaults will yield a similar configuration to that of the LLaMA-7B.
37
+
38
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
39
+ documentation from [`PretrainedConfig`] for more information.
40
+
41
+
42
+ Args:
43
+ vocab_size (`int`, *optional*, defaults to 32000):
44
+ Vocabulary size of the LLaMA model. Defines the number of different tokens that can be represented by the
45
+ `inputs_ids` passed when calling [`LlamaModel`]
46
+ hidden_size (`int`, *optional*, defaults to 4096):
47
+ Dimension of the hidden representations.
48
+ intermediate_size (`int`, *optional*, defaults to 11008):
49
+ Dimension of the MLP representations.
50
+ num_hidden_layers (`int`, *optional*, defaults to 32):
51
+ Number of hidden layers in the Transformer decoder.
52
+ num_attention_heads (`int`, *optional*, defaults to 32):
53
+ Number of attention heads for each attention layer in the Transformer decoder.
54
+ num_key_value_heads (`int`, *optional*):
55
+ This is the number of key_value heads that should be used to implement Grouped Query Attention. If
56
+ `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
57
+ `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
58
+ converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
59
+ by meanpooling all the original heads within that group. For more details checkout [this
60
+ paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
61
+ `num_attention_heads`.
62
+ hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
63
+ The non-linear activation function (function or string) in the decoder.
64
+ max_position_embeddings (`int`, *optional*, defaults to 2048):
65
+ The maximum sequence length that this model might ever be used with. Llama 1 supports up to 2048 tokens,
66
+ Llama 2 up to 4096, CodeLlama up to 16384.
67
+ initializer_range (`float`, *optional*, defaults to 0.02):
68
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
69
+ rms_norm_eps (`float`, *optional*, defaults to 1e-06):
70
+ The epsilon used by the rms normalization layers.
71
+ use_cache (`bool`, *optional*, defaults to `True`):
72
+ Whether or not the model should return the last key/values attentions (not used by all models). Only
73
+ relevant if `config.is_decoder=True`.
74
+ pad_token_id (`int`, *optional*):
75
+ Padding token id.
76
+ bos_token_id (`int`, *optional*, defaults to 1):
77
+ Beginning of stream token id.
78
+ eos_token_id (`int`, *optional*, defaults to 2):
79
+ End of stream token id.
80
+ pretraining_tp (`int`, *optional*, defaults to 1):
81
+ Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this
82
+ document](https://huggingface.co/docs/transformers/main/perf_train_gpu_many#tensor-parallelism) to understand more about it. This value is
83
+ necessary to ensure exact reproducibility of the pretraining results. Please refer to [this
84
+ issue](https://github.com/pytorch/pytorch/issues/76232).
85
+ tie_word_embeddings (`bool`, *optional*, defaults to `False`):
86
+ Whether to tie weight embeddings
87
+ rope_theta (`float`, *optional*, defaults to 10000.0):
88
+ The base period of the RoPE embeddings.
89
+ rope_scaling (`Dict`, *optional*):
90
+ Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
91
+ strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
92
+ `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update
93
+ `max_position_embeddings` to the expected new maximum. See the following thread for more information on how
94
+ these scaling strategies behave:
95
+ https://www.reddit.com/r/LocalLLaMA/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an
96
+ experimental feature, subject to breaking API changes in future versions.
97
+ attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
98
+ Whether to use a bias in the query, key, value and output projection layers during self-attention.
99
+ attention_dropout (`float`, *optional*, defaults to 0.0):
100
+ The dropout ratio for the attention probabilities.
101
+
102
+ ```python
103
+ >>> from transformers import LlamaModel, LlamaConfig
104
+
105
+ >>> # Initializing a LLaMA llama-7b style configuration
106
+ >>> configuration = LlamaConfig()
107
+
108
+ >>> # Initializing a model from the llama-7b style configuration
109
+ >>> model = LlamaModel(configuration)
110
+
111
+ >>> # Accessing the model configuration
112
+ >>> configuration = model.config
113
+ ```"""
114
+
115
+ model_type = "llama"
116
+ keys_to_ignore_at_inference = ["past_key_values"]
117
+
118
+ def __init__(
119
+ self,
120
+ vocab_size=32000,
121
+ hidden_size=4096,
122
+ intermediate_size=11008,
123
+ num_hidden_layers=32,
124
+ num_attention_heads=32,
125
+ num_key_value_heads=None,
126
+ hidden_act="silu",
127
+ max_position_embeddings=2048,
128
+ initializer_range=0.02,
129
+ rms_norm_eps=1e-6,
130
+ use_cache=True,
131
+ pad_token_id=None,
132
+ bos_token_id=1,
133
+ eos_token_id=2,
134
+ pretraining_tp=1,
135
+ tie_word_embeddings=False,
136
+ rope_theta=10000.0,
137
+ rope_scaling=None,
138
+ attention_bias=False,
139
+ attention_dropout=0.0,
140
+ **kwargs,
141
+ ):
142
+ self.vocab_size = vocab_size
143
+ self.max_position_embeddings = max_position_embeddings
144
+ self.hidden_size = hidden_size
145
+ self.intermediate_size = intermediate_size
146
+ self.num_hidden_layers = num_hidden_layers
147
+ self.num_attention_heads = num_attention_heads
148
+
149
+ # for backward compatibility
150
+ if num_key_value_heads is None:
151
+ num_key_value_heads = num_attention_heads
152
+
153
+ self.num_key_value_heads = num_key_value_heads
154
+ self.hidden_act = hidden_act
155
+ self.initializer_range = initializer_range
156
+ self.rms_norm_eps = rms_norm_eps
157
+ self.pretraining_tp = pretraining_tp
158
+ self.use_cache = use_cache
159
+ self.rope_theta = rope_theta
160
+ self.rope_scaling = rope_scaling
161
+ self._rope_scaling_validation()
162
+ self.attention_bias = attention_bias
163
+ self.attention_dropout = attention_dropout
164
+
165
+ super().__init__(
166
+ pad_token_id=pad_token_id,
167
+ bos_token_id=bos_token_id,
168
+ eos_token_id=eos_token_id,
169
+ tie_word_embeddings=tie_word_embeddings,
170
+ **kwargs,
171
+ )
172
+
173
+ def _rope_scaling_validation(self):
174
+ """
175
+ Validate the `rope_scaling` configuration.
176
+ """
177
+ if self.rope_scaling is None:
178
+ return
179
+
180
+ if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
181
+ raise ValueError(
182
+ "`rope_scaling` must be a dictionary with two fields, `type` and `factor`, " f"got {self.rope_scaling}"
183
+ )
184
+ rope_scaling_type = self.rope_scaling.get("type", None)
185
+ rope_scaling_factor = self.rope_scaling.get("factor", None)
186
+ if rope_scaling_type is None or rope_scaling_type not in ["linear", "dynamic"]:
187
+ raise ValueError(
188
+ f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}"
189
+ )
190
+ if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0:
191
+ raise ValueError(f"`rope_scaling`'s factor field must be a float > 1, got {rope_scaling_factor}")
192
+
193
+
194
+
195
+ """Vision model configuration"""
196
+ from typing import Union
197
+ from transformers import PretrainedConfig
198
+ import os
199
+
200
+
201
+ class SigLipVisionConfig(PretrainedConfig):
202
+ model_type = "siglip_vision_model"
203
+
204
+ def __init__(
205
+ self,
206
+ hidden_size=1152,
207
+ image_mean=(0.5, 0.5, 0.5),
208
+ intermediate_size=4304,
209
+ num_hidden_layers=27,
210
+ num_attention_heads=16,
211
+ num_channels=3,
212
+ image_size=384,
213
+ patch_size=14,
214
+ hidden_act="gelu_pytorch_tanh",
215
+ layer_norm_eps=1e-6,
216
+ attention_dropout=0.0,
217
+ **kwargs,
218
+ ):
219
+ super().__init__(**kwargs)
220
+
221
+ self.hidden_size = hidden_size
222
+ self.intermediate_size = intermediate_size
223
+ self.num_hidden_layers = num_hidden_layers
224
+ self.num_attention_heads = num_attention_heads
225
+ self.num_channels = num_channels
226
+ self.patch_size = patch_size
227
+ self.image_size = image_size
228
+ self.attention_dropout = attention_dropout
229
+ self.layer_norm_eps = layer_norm_eps
230
+ self.hidden_act = hidden_act
231
+ self.image_mean = image_mean
232
+
233
+ @classmethod
234
+ def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> "PretrainedConfig":
235
+ cls._set_token_in_kwargs(kwargs)
236
+
237
+ config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
238
+
239
+ # get the vision config dict if we are loading from SigLipConfig
240
+ if config_dict.get("model_type") == "siglip":
241
+ config_dict = config_dict["vision_config"]
242
+
243
+ if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
244
+ logger.warning(
245
+ f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
246
+ f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
247
+ )
248
+
249
+ return cls.from_dict(config_dict, **kwargs)
250
+
251
+
252
+ class FEGeoLlamaConfig(LlamaConfig):
253
+ model_type = "fegeo-llama"
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.40.0"
7
+ }
model-00001-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5decceca50307048127194d9b95acffce0b0784b510bfccd93325289c6779be
3
+ size 4793130720
model-00002-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c20a6f04df64f100647f12ceb99ed612a8d5995937a97ed911752b8cc0f3f30
3
+ size 4756459680
model-00003-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf40e7af12d2f10d640f6f6e160fb0dfb4f3ace50a8a41d22cb88c06f229f30b
3
+ size 4991370096
model-00004-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d198079c0d91b470c3dad829592e3b1a0d434075e53c1903e23ac17bf313a11c
3
+ size 4756459720
model-00005-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bff0d7c60d2f0fc2a75c6fbe2bca8a99c700b04e403a511b85e9579a05e967a
3
+ size 4756459720
model-00006-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f4d13a0c8e57994cae6eec530590f25c1437bfeda4d77527e2cd0ef796d6a06
3
+ size 4991370120
model-00007-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00effa11c3d9790f9ea867c94c33c1c672b09c86ff71800bf2b5db1525814125
3
+ size 4756459720
model-00008-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a0eca81f81c112ba61db98518fef9762652f193e011e418f5aaf01a824868f6
3
+ size 4756459720
model-00009-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95c4188a61a682d1eabdef6f0a06a0b207c4675652a4071e91d985939cdd673f
3
+ size 4991370120
model-00010-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c48ac908415b3ae9d3673b85496f8eb785db4eb18a7298f6d28c81bd53d31e82
3
+ size 4756459720
model-00011-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:190e3fe70a791d0731b7e4fe9ec2df4b3e5d8b1ef27040ba7ecd85fd0a85681c
3
+ size 4756459720
model-00012-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b9321e106c62225675427a969d533e2d54b58309bafb57886c91778a179eff5
3
+ size 4991370120
model-00013-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b83eaaa1b916d2be90004d626856b1509e883ac5406721f33fd0357f4fd017b0
3
+ size 4756459720
model-00014-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58903530baf34db3c3dc70f385ea78c63419f2edb745489694488b98ee1a4d04
3
+ size 4756459720
model-00015-of-00015.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:672826baae4ead021e379ee874dfb9c213211e7b3ca47f75936bc6720382d13f
3
+ size 2126014672
model.safetensors.index.json ADDED
@@ -0,0 +1,975 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 69692634176
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00015-of-00015.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00015.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00015.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00015.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00003-of-00015.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00003-of-00015.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00004-of-00015.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00004-of-00015.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00004-of-00015.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00004-of-00015.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00005-of-00015.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00005-of-00015.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00005-of-00015.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00005-of-00015.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00015.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00006-of-00015.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00006-of-00015.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00006-of-00015.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00006-of-00015.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00006-of-00015.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00007-of-00015.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00007-of-00015.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00007-of-00015.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00007-of-00015.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00008-of-00015.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00002-of-00015.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00008-of-00015.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00008-of-00015.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
242
+ "model.layers.32.input_layernorm.weight": "model-00008-of-00015.safetensors",
243
+ "model.layers.32.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
244
+ "model.layers.32.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
245
+ "model.layers.32.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
246
+ "model.layers.32.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
247
+ "model.layers.32.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
248
+ "model.layers.32.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
249
+ "model.layers.32.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
250
+ "model.layers.32.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
251
+ "model.layers.33.input_layernorm.weight": "model-00009-of-00015.safetensors",
252
+ "model.layers.33.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
253
+ "model.layers.33.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
254
+ "model.layers.33.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
255
+ "model.layers.33.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
256
+ "model.layers.33.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
257
+ "model.layers.33.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
258
+ "model.layers.33.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
259
+ "model.layers.33.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
260
+ "model.layers.34.input_layernorm.weight": "model-00009-of-00015.safetensors",
261
+ "model.layers.34.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
262
+ "model.layers.34.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
263
+ "model.layers.34.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
264
+ "model.layers.34.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
265
+ "model.layers.34.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
266
+ "model.layers.34.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
267
+ "model.layers.34.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
268
+ "model.layers.34.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
269
+ "model.layers.35.input_layernorm.weight": "model-00009-of-00015.safetensors",
270
+ "model.layers.35.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
271
+ "model.layers.35.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
272
+ "model.layers.35.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
273
+ "model.layers.35.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
274
+ "model.layers.35.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
275
+ "model.layers.35.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
276
+ "model.layers.35.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
277
+ "model.layers.35.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
278
+ "model.layers.36.input_layernorm.weight": "model-00009-of-00015.safetensors",
279
+ "model.layers.36.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
280
+ "model.layers.36.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
281
+ "model.layers.36.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
282
+ "model.layers.36.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
283
+ "model.layers.36.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
284
+ "model.layers.36.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
285
+ "model.layers.36.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
286
+ "model.layers.36.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
287
+ "model.layers.37.input_layernorm.weight": "model-00009-of-00015.safetensors",
288
+ "model.layers.37.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
289
+ "model.layers.37.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
290
+ "model.layers.37.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
291
+ "model.layers.37.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
292
+ "model.layers.37.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
293
+ "model.layers.37.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
294
+ "model.layers.37.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
295
+ "model.layers.37.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
296
+ "model.layers.38.input_layernorm.weight": "model-00010-of-00015.safetensors",
297
+ "model.layers.38.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
298
+ "model.layers.38.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
299
+ "model.layers.38.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
300
+ "model.layers.38.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
301
+ "model.layers.38.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
302
+ "model.layers.38.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
303
+ "model.layers.38.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
304
+ "model.layers.38.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
305
+ "model.layers.39.input_layernorm.weight": "model-00010-of-00015.safetensors",
306
+ "model.layers.39.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
307
+ "model.layers.39.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
308
+ "model.layers.39.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
309
+ "model.layers.39.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
310
+ "model.layers.39.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
311
+ "model.layers.39.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
312
+ "model.layers.39.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
313
+ "model.layers.39.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
314
+ "model.layers.4.input_layernorm.weight": "model-00002-of-00015.safetensors",
315
+ "model.layers.4.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
316
+ "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
317
+ "model.layers.4.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
318
+ "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
319
+ "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
320
+ "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
321
+ "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
322
+ "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
323
+ "model.layers.40.input_layernorm.weight": "model-00010-of-00015.safetensors",
324
+ "model.layers.40.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
325
+ "model.layers.40.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
326
+ "model.layers.40.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
327
+ "model.layers.40.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
328
+ "model.layers.40.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
329
+ "model.layers.40.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
330
+ "model.layers.40.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
331
+ "model.layers.40.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
332
+ "model.layers.41.input_layernorm.weight": "model-00010-of-00015.safetensors",
333
+ "model.layers.41.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
334
+ "model.layers.41.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
335
+ "model.layers.41.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
336
+ "model.layers.41.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
337
+ "model.layers.41.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
338
+ "model.layers.41.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
339
+ "model.layers.41.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
340
+ "model.layers.41.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
341
+ "model.layers.42.input_layernorm.weight": "model-00011-of-00015.safetensors",
342
+ "model.layers.42.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
343
+ "model.layers.42.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
344
+ "model.layers.42.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
345
+ "model.layers.42.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
346
+ "model.layers.42.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
347
+ "model.layers.42.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
348
+ "model.layers.42.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
349
+ "model.layers.42.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
350
+ "model.layers.43.input_layernorm.weight": "model-00011-of-00015.safetensors",
351
+ "model.layers.43.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
352
+ "model.layers.43.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
353
+ "model.layers.43.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
354
+ "model.layers.43.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
355
+ "model.layers.43.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
356
+ "model.layers.43.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
357
+ "model.layers.43.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
358
+ "model.layers.43.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
359
+ "model.layers.44.input_layernorm.weight": "model-00011-of-00015.safetensors",
360
+ "model.layers.44.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
361
+ "model.layers.44.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
362
+ "model.layers.44.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
363
+ "model.layers.44.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
364
+ "model.layers.44.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
365
+ "model.layers.44.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
366
+ "model.layers.44.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
367
+ "model.layers.44.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
368
+ "model.layers.45.input_layernorm.weight": "model-00011-of-00015.safetensors",
369
+ "model.layers.45.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
370
+ "model.layers.45.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
371
+ "model.layers.45.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
372
+ "model.layers.45.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
373
+ "model.layers.45.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
374
+ "model.layers.45.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
375
+ "model.layers.45.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
376
+ "model.layers.45.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
377
+ "model.layers.46.input_layernorm.weight": "model-00012-of-00015.safetensors",
378
+ "model.layers.46.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
379
+ "model.layers.46.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
380
+ "model.layers.46.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
381
+ "model.layers.46.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
382
+ "model.layers.46.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
383
+ "model.layers.46.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
384
+ "model.layers.46.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
385
+ "model.layers.46.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
386
+ "model.layers.47.input_layernorm.weight": "model-00012-of-00015.safetensors",
387
+ "model.layers.47.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
388
+ "model.layers.47.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
389
+ "model.layers.47.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
390
+ "model.layers.47.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
391
+ "model.layers.47.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
392
+ "model.layers.47.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
393
+ "model.layers.47.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
394
+ "model.layers.47.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
395
+ "model.layers.48.input_layernorm.weight": "model-00012-of-00015.safetensors",
396
+ "model.layers.48.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
397
+ "model.layers.48.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
398
+ "model.layers.48.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
399
+ "model.layers.48.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
400
+ "model.layers.48.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
401
+ "model.layers.48.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
402
+ "model.layers.48.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
403
+ "model.layers.48.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
404
+ "model.layers.49.input_layernorm.weight": "model-00012-of-00015.safetensors",
405
+ "model.layers.49.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
406
+ "model.layers.49.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
407
+ "model.layers.49.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
408
+ "model.layers.49.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
409
+ "model.layers.49.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
410
+ "model.layers.49.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
411
+ "model.layers.49.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
412
+ "model.layers.49.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
413
+ "model.layers.5.input_layernorm.weight": "model-00002-of-00015.safetensors",
414
+ "model.layers.5.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
415
+ "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
416
+ "model.layers.5.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
417
+ "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
418
+ "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
419
+ "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
420
+ "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
421
+ "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
422
+ "model.layers.50.input_layernorm.weight": "model-00012-of-00015.safetensors",
423
+ "model.layers.50.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
424
+ "model.layers.50.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
425
+ "model.layers.50.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
426
+ "model.layers.50.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
427
+ "model.layers.50.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
428
+ "model.layers.50.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
429
+ "model.layers.50.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
430
+ "model.layers.50.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
431
+ "model.layers.51.input_layernorm.weight": "model-00013-of-00015.safetensors",
432
+ "model.layers.51.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
433
+ "model.layers.51.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
434
+ "model.layers.51.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
435
+ "model.layers.51.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
436
+ "model.layers.51.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
437
+ "model.layers.51.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
438
+ "model.layers.51.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
439
+ "model.layers.51.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
440
+ "model.layers.52.input_layernorm.weight": "model-00013-of-00015.safetensors",
441
+ "model.layers.52.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
442
+ "model.layers.52.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
443
+ "model.layers.52.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
444
+ "model.layers.52.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
445
+ "model.layers.52.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
446
+ "model.layers.52.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
447
+ "model.layers.52.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
448
+ "model.layers.52.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
449
+ "model.layers.53.input_layernorm.weight": "model-00013-of-00015.safetensors",
450
+ "model.layers.53.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
451
+ "model.layers.53.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
452
+ "model.layers.53.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
453
+ "model.layers.53.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
454
+ "model.layers.53.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
455
+ "model.layers.53.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
456
+ "model.layers.53.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
457
+ "model.layers.53.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
458
+ "model.layers.54.input_layernorm.weight": "model-00013-of-00015.safetensors",
459
+ "model.layers.54.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
460
+ "model.layers.54.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
461
+ "model.layers.54.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
462
+ "model.layers.54.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
463
+ "model.layers.54.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
464
+ "model.layers.54.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
465
+ "model.layers.54.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
466
+ "model.layers.54.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
467
+ "model.layers.55.input_layernorm.weight": "model-00014-of-00015.safetensors",
468
+ "model.layers.55.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
469
+ "model.layers.55.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
470
+ "model.layers.55.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
471
+ "model.layers.55.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
472
+ "model.layers.55.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
473
+ "model.layers.55.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
474
+ "model.layers.55.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
475
+ "model.layers.55.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
476
+ "model.layers.56.input_layernorm.weight": "model-00014-of-00015.safetensors",
477
+ "model.layers.56.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
478
+ "model.layers.56.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
479
+ "model.layers.56.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
480
+ "model.layers.56.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
481
+ "model.layers.56.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
482
+ "model.layers.56.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
483
+ "model.layers.56.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
484
+ "model.layers.56.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
485
+ "model.layers.57.input_layernorm.weight": "model-00014-of-00015.safetensors",
486
+ "model.layers.57.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
487
+ "model.layers.57.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
488
+ "model.layers.57.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
489
+ "model.layers.57.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
490
+ "model.layers.57.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
491
+ "model.layers.57.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
492
+ "model.layers.57.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
493
+ "model.layers.57.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
494
+ "model.layers.58.input_layernorm.weight": "model-00014-of-00015.safetensors",
495
+ "model.layers.58.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
496
+ "model.layers.58.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
497
+ "model.layers.58.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
498
+ "model.layers.58.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
499
+ "model.layers.58.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
500
+ "model.layers.58.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
501
+ "model.layers.58.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
502
+ "model.layers.58.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
503
+ "model.layers.59.input_layernorm.weight": "model-00015-of-00015.safetensors",
504
+ "model.layers.59.mlp.down_proj.weight": "model-00015-of-00015.safetensors",
505
+ "model.layers.59.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
506
+ "model.layers.59.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
507
+ "model.layers.59.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
508
+ "model.layers.59.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
509
+ "model.layers.59.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
510
+ "model.layers.59.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
511
+ "model.layers.59.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
512
+ "model.layers.6.input_layernorm.weight": "model-00002-of-00015.safetensors",
513
+ "model.layers.6.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
514
+ "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
515
+ "model.layers.6.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
516
+ "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
517
+ "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
518
+ "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
519
+ "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
520
+ "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
521
+ "model.layers.7.input_layernorm.weight": "model-00003-of-00015.safetensors",
522
+ "model.layers.7.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
523
+ "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
524
+ "model.layers.7.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
525
+ "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
526
+ "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
527
+ "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
528
+ "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
529
+ "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
530
+ "model.layers.8.input_layernorm.weight": "model-00003-of-00015.safetensors",
531
+ "model.layers.8.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
532
+ "model.layers.8.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
533
+ "model.layers.8.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
534
+ "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
535
+ "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
536
+ "model.layers.8.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
537
+ "model.layers.8.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
538
+ "model.layers.8.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
539
+ "model.layers.9.input_layernorm.weight": "model-00003-of-00015.safetensors",
540
+ "model.layers.9.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
541
+ "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
542
+ "model.layers.9.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
543
+ "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
544
+ "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
545
+ "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
546
+ "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
547
+ "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
548
+ "model.mm_projector.0.bias": "model-00015-of-00015.safetensors",
549
+ "model.mm_projector.0.weight": "model-00015-of-00015.safetensors",
550
+ "model.mm_projector.2.bias": "model-00015-of-00015.safetensors",
551
+ "model.mm_projector.2.weight": "model-00015-of-00015.safetensors",
552
+ "model.norm.weight": "model-00015-of-00015.safetensors",
553
+ "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.bias": "model-00015-of-00015.safetensors",
554
+ "model.vision_tower.vision_tower.vision_model.embeddings.patch_embedding.weight": "model-00015-of-00015.safetensors",
555
+ "model.vision_tower.vision_tower.vision_model.embeddings.position_embedding.weight": "model-00015-of-00015.safetensors",
556
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias": "model-00015-of-00015.safetensors",
557
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight": "model-00015-of-00015.safetensors",
558
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias": "model-00015-of-00015.safetensors",
559
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight": "model-00015-of-00015.safetensors",
560
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias": "model-00015-of-00015.safetensors",
561
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight": "model-00015-of-00015.safetensors",
562
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias": "model-00015-of-00015.safetensors",
563
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight": "model-00015-of-00015.safetensors",
564
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
565
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
566
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
567
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
568
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
569
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
570
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
571
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
572
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias": "model-00015-of-00015.safetensors",
573
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight": "model-00015-of-00015.safetensors",
574
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias": "model-00015-of-00015.safetensors",
575
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight": "model-00015-of-00015.safetensors",
576
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias": "model-00015-of-00015.safetensors",
577
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight": "model-00015-of-00015.safetensors",
578
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias": "model-00015-of-00015.safetensors",
579
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight": "model-00015-of-00015.safetensors",
580
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
581
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
582
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
583
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
584
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
585
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
586
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
587
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
588
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias": "model-00015-of-00015.safetensors",
589
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight": "model-00015-of-00015.safetensors",
590
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias": "model-00015-of-00015.safetensors",
591
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight": "model-00015-of-00015.safetensors",
592
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias": "model-00015-of-00015.safetensors",
593
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight": "model-00015-of-00015.safetensors",
594
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias": "model-00015-of-00015.safetensors",
595
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight": "model-00015-of-00015.safetensors",
596
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
597
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
598
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
599
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
600
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
601
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
602
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
603
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
604
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias": "model-00015-of-00015.safetensors",
605
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight": "model-00015-of-00015.safetensors",
606
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias": "model-00015-of-00015.safetensors",
607
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight": "model-00015-of-00015.safetensors",
608
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias": "model-00015-of-00015.safetensors",
609
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight": "model-00015-of-00015.safetensors",
610
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias": "model-00015-of-00015.safetensors",
611
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight": "model-00015-of-00015.safetensors",
612
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
613
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
614
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
615
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
616
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
617
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
618
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
619
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
620
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias": "model-00015-of-00015.safetensors",
621
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight": "model-00015-of-00015.safetensors",
622
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias": "model-00015-of-00015.safetensors",
623
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight": "model-00015-of-00015.safetensors",
624
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias": "model-00015-of-00015.safetensors",
625
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight": "model-00015-of-00015.safetensors",
626
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias": "model-00015-of-00015.safetensors",
627
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight": "model-00015-of-00015.safetensors",
628
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
629
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
630
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
631
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
632
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
633
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
634
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
635
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
636
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias": "model-00015-of-00015.safetensors",
637
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight": "model-00015-of-00015.safetensors",
638
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias": "model-00015-of-00015.safetensors",
639
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight": "model-00015-of-00015.safetensors",
640
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias": "model-00015-of-00015.safetensors",
641
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight": "model-00015-of-00015.safetensors",
642
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias": "model-00015-of-00015.safetensors",
643
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight": "model-00015-of-00015.safetensors",
644
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
645
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
646
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
647
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
648
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
649
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
650
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
651
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
652
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias": "model-00015-of-00015.safetensors",
653
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight": "model-00015-of-00015.safetensors",
654
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias": "model-00015-of-00015.safetensors",
655
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight": "model-00015-of-00015.safetensors",
656
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias": "model-00015-of-00015.safetensors",
657
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight": "model-00015-of-00015.safetensors",
658
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias": "model-00015-of-00015.safetensors",
659
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight": "model-00015-of-00015.safetensors",
660
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
661
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
662
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
663
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
664
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
665
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
666
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
667
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
668
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias": "model-00015-of-00015.safetensors",
669
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight": "model-00015-of-00015.safetensors",
670
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias": "model-00015-of-00015.safetensors",
671
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight": "model-00015-of-00015.safetensors",
672
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias": "model-00015-of-00015.safetensors",
673
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight": "model-00015-of-00015.safetensors",
674
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias": "model-00015-of-00015.safetensors",
675
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight": "model-00015-of-00015.safetensors",
676
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
677
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
678
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
679
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
680
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
681
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
682
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
683
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
684
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias": "model-00015-of-00015.safetensors",
685
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight": "model-00015-of-00015.safetensors",
686
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias": "model-00015-of-00015.safetensors",
687
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight": "model-00015-of-00015.safetensors",
688
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias": "model-00015-of-00015.safetensors",
689
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight": "model-00015-of-00015.safetensors",
690
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias": "model-00015-of-00015.safetensors",
691
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight": "model-00015-of-00015.safetensors",
692
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
693
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
694
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
695
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
696
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
697
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
698
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
699
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
700
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias": "model-00015-of-00015.safetensors",
701
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight": "model-00015-of-00015.safetensors",
702
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias": "model-00015-of-00015.safetensors",
703
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight": "model-00015-of-00015.safetensors",
704
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias": "model-00015-of-00015.safetensors",
705
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight": "model-00015-of-00015.safetensors",
706
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias": "model-00015-of-00015.safetensors",
707
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight": "model-00015-of-00015.safetensors",
708
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
709
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
710
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
711
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
712
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
713
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
714
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
715
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
716
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias": "model-00015-of-00015.safetensors",
717
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight": "model-00015-of-00015.safetensors",
718
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias": "model-00015-of-00015.safetensors",
719
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight": "model-00015-of-00015.safetensors",
720
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias": "model-00015-of-00015.safetensors",
721
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight": "model-00015-of-00015.safetensors",
722
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias": "model-00015-of-00015.safetensors",
723
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight": "model-00015-of-00015.safetensors",
724
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
725
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
726
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
727
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
728
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
729
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
730
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
731
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
732
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias": "model-00015-of-00015.safetensors",
733
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight": "model-00015-of-00015.safetensors",
734
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias": "model-00015-of-00015.safetensors",
735
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight": "model-00015-of-00015.safetensors",
736
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias": "model-00015-of-00015.safetensors",
737
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight": "model-00015-of-00015.safetensors",
738
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias": "model-00015-of-00015.safetensors",
739
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight": "model-00015-of-00015.safetensors",
740
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
741
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
742
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
743
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
744
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
745
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
746
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
747
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
748
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias": "model-00015-of-00015.safetensors",
749
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight": "model-00015-of-00015.safetensors",
750
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias": "model-00015-of-00015.safetensors",
751
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight": "model-00015-of-00015.safetensors",
752
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias": "model-00015-of-00015.safetensors",
753
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight": "model-00015-of-00015.safetensors",
754
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias": "model-00015-of-00015.safetensors",
755
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight": "model-00015-of-00015.safetensors",
756
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
757
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
758
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
759
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
760
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
761
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
762
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
763
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
764
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias": "model-00015-of-00015.safetensors",
765
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight": "model-00015-of-00015.safetensors",
766
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias": "model-00015-of-00015.safetensors",
767
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight": "model-00015-of-00015.safetensors",
768
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias": "model-00015-of-00015.safetensors",
769
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight": "model-00015-of-00015.safetensors",
770
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias": "model-00015-of-00015.safetensors",
771
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight": "model-00015-of-00015.safetensors",
772
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
773
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
774
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
775
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
776
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
777
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
778
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
779
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
780
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias": "model-00015-of-00015.safetensors",
781
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight": "model-00015-of-00015.safetensors",
782
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias": "model-00015-of-00015.safetensors",
783
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight": "model-00015-of-00015.safetensors",
784
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias": "model-00015-of-00015.safetensors",
785
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight": "model-00015-of-00015.safetensors",
786
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias": "model-00015-of-00015.safetensors",
787
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight": "model-00015-of-00015.safetensors",
788
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
789
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
790
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
791
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
792
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
793
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
794
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
795
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
796
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias": "model-00015-of-00015.safetensors",
797
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight": "model-00015-of-00015.safetensors",
798
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias": "model-00015-of-00015.safetensors",
799
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight": "model-00015-of-00015.safetensors",
800
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias": "model-00015-of-00015.safetensors",
801
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight": "model-00015-of-00015.safetensors",
802
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias": "model-00015-of-00015.safetensors",
803
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight": "model-00015-of-00015.safetensors",
804
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
805
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
806
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
807
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
808
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
809
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
810
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
811
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
812
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias": "model-00015-of-00015.safetensors",
813
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight": "model-00015-of-00015.safetensors",
814
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias": "model-00015-of-00015.safetensors",
815
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight": "model-00015-of-00015.safetensors",
816
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias": "model-00015-of-00015.safetensors",
817
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight": "model-00015-of-00015.safetensors",
818
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias": "model-00015-of-00015.safetensors",
819
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight": "model-00015-of-00015.safetensors",
820
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
821
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
822
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
823
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
824
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
825
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
826
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
827
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
828
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias": "model-00015-of-00015.safetensors",
829
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight": "model-00015-of-00015.safetensors",
830
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias": "model-00015-of-00015.safetensors",
831
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight": "model-00015-of-00015.safetensors",
832
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias": "model-00015-of-00015.safetensors",
833
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight": "model-00015-of-00015.safetensors",
834
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias": "model-00015-of-00015.safetensors",
835
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight": "model-00015-of-00015.safetensors",
836
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
837
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
838
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
839
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
840
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
841
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
842
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
843
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
844
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias": "model-00015-of-00015.safetensors",
845
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight": "model-00015-of-00015.safetensors",
846
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias": "model-00015-of-00015.safetensors",
847
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight": "model-00015-of-00015.safetensors",
848
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias": "model-00015-of-00015.safetensors",
849
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight": "model-00015-of-00015.safetensors",
850
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias": "model-00015-of-00015.safetensors",
851
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight": "model-00015-of-00015.safetensors",
852
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
853
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
854
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
855
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
856
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
857
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
858
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
859
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
860
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias": "model-00015-of-00015.safetensors",
861
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight": "model-00015-of-00015.safetensors",
862
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias": "model-00015-of-00015.safetensors",
863
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight": "model-00015-of-00015.safetensors",
864
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias": "model-00015-of-00015.safetensors",
865
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight": "model-00015-of-00015.safetensors",
866
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias": "model-00015-of-00015.safetensors",
867
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight": "model-00015-of-00015.safetensors",
868
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
869
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
870
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
871
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
872
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
873
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
874
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
875
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
876
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias": "model-00015-of-00015.safetensors",
877
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight": "model-00015-of-00015.safetensors",
878
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias": "model-00015-of-00015.safetensors",
879
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight": "model-00015-of-00015.safetensors",
880
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias": "model-00015-of-00015.safetensors",
881
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight": "model-00015-of-00015.safetensors",
882
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias": "model-00015-of-00015.safetensors",
883
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight": "model-00015-of-00015.safetensors",
884
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
885
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
886
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
887
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
888
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
889
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
890
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
891
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
892
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias": "model-00015-of-00015.safetensors",
893
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight": "model-00015-of-00015.safetensors",
894
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias": "model-00015-of-00015.safetensors",
895
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight": "model-00015-of-00015.safetensors",
896
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias": "model-00015-of-00015.safetensors",
897
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight": "model-00015-of-00015.safetensors",
898
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias": "model-00015-of-00015.safetensors",
899
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight": "model-00015-of-00015.safetensors",
900
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
901
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
902
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
903
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
904
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
905
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
906
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
907
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
908
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias": "model-00015-of-00015.safetensors",
909
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight": "model-00015-of-00015.safetensors",
910
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias": "model-00015-of-00015.safetensors",
911
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight": "model-00015-of-00015.safetensors",
912
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias": "model-00015-of-00015.safetensors",
913
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight": "model-00015-of-00015.safetensors",
914
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias": "model-00015-of-00015.safetensors",
915
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight": "model-00015-of-00015.safetensors",
916
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
917
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
918
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
919
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
920
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
921
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
922
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
923
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
924
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias": "model-00015-of-00015.safetensors",
925
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight": "model-00015-of-00015.safetensors",
926
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias": "model-00015-of-00015.safetensors",
927
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight": "model-00015-of-00015.safetensors",
928
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias": "model-00015-of-00015.safetensors",
929
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight": "model-00015-of-00015.safetensors",
930
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias": "model-00015-of-00015.safetensors",
931
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight": "model-00015-of-00015.safetensors",
932
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
933
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
934
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
935
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
936
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
937
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
938
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
939
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
940
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias": "model-00015-of-00015.safetensors",
941
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight": "model-00015-of-00015.safetensors",
942
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias": "model-00015-of-00015.safetensors",
943
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight": "model-00015-of-00015.safetensors",
944
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias": "model-00015-of-00015.safetensors",
945
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight": "model-00015-of-00015.safetensors",
946
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias": "model-00015-of-00015.safetensors",
947
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight": "model-00015-of-00015.safetensors",
948
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
949
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
950
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
951
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
952
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
953
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
954
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
955
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
956
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias": "model-00015-of-00015.safetensors",
957
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight": "model-00015-of-00015.safetensors",
958
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias": "model-00015-of-00015.safetensors",
959
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight": "model-00015-of-00015.safetensors",
960
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias": "model-00015-of-00015.safetensors",
961
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight": "model-00015-of-00015.safetensors",
962
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias": "model-00015-of-00015.safetensors",
963
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight": "model-00015-of-00015.safetensors",
964
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
965
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight": "model-00015-of-00015.safetensors",
966
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias": "model-00015-of-00015.safetensors",
967
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight": "model-00015-of-00015.safetensors",
968
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias": "model-00015-of-00015.safetensors",
969
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00015-of-00015.safetensors",
970
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00015-of-00015.safetensors",
971
+ "model.vision_tower.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00015-of-00015.safetensors",
972
+ "model.vision_tower.vision_tower.vision_model.post_layernorm.bias": "model-00015-of-00015.safetensors",
973
+ "model.vision_tower.vision_tower.vision_model.post_layernorm.weight": "model-00015-of-00015.safetensors"
974
+ }
975
+ }
sample/4927.png ADDED
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:386c49cf943d71aa110361135338c50e38beeff0a66593480421f37b319e1a39
3
+ size 1033105
tokenizer_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<|startoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "<|endoftext|>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "7": {
31
+ "content": "<|im_end|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ }
38
+ },
39
+ "bos_token": "<|startoftext|>",
40
+ "chat_template": "{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\\n' + content + '<|im_end|>\\n<|im_start|>assistant\\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\\n' }}{% endif %}{% endfor %}",
41
+ "clean_up_tokenization_spaces": false,
42
+ "eos_token": "<|im_end|>",
43
+ "legacy": true,
44
+ "model_max_length": 4096,
45
+ "pad_token": "<unk>",
46
+ "padding_side": "right",
47
+ "sp_model_kwargs": {},
48
+ "spaces_between_special_tokens": false,
49
+ "split_special_tokens": false,
50
+ "tokenizer_class": "LlamaTokenizer",
51
+ "unk_token": "<unk>",
52
+ "use_default_system_prompt": false
53
+ }