Commit
•
0120fdc
1
Parent(s):
cbe57d6
Upload LlavaOnevisionForConditionalGeneration
Browse files- README.md +3 -3
- config.json +3 -8
- model-00001-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
README.md
CHANGED
@@ -2,15 +2,15 @@
|
|
2 |
language:
|
3 |
- en
|
4 |
- zh
|
5 |
-
pipeline_tag: image-text-to-text
|
6 |
-
inference: false
|
7 |
-
arxiv: 2408.03326
|
8 |
license: apache-2.0
|
9 |
tags:
|
10 |
- vision
|
11 |
- image-text-to-text
|
12 |
datasets:
|
13 |
- lmms-lab/LLaVA-OneVision-Data
|
|
|
|
|
|
|
14 |
---
|
15 |
# LLaVA-Onevision Model Card
|
16 |
|
|
|
2 |
language:
|
3 |
- en
|
4 |
- zh
|
|
|
|
|
|
|
5 |
license: apache-2.0
|
6 |
tags:
|
7 |
- vision
|
8 |
- image-text-to-text
|
9 |
datasets:
|
10 |
- lmms-lab/LLaVA-OneVision-Data
|
11 |
+
pipeline_tag: image-text-to-text
|
12 |
+
inference: false
|
13 |
+
arxiv: 2408.03326
|
14 |
---
|
15 |
# LLaVA-Onevision Model Card
|
16 |
|
config.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"_name_or_path": "/raid/raushan/si-7b",
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
6 |
"ignore_index": -100,
|
7 |
"image_grid_pinpoints": [
|
@@ -151,7 +151,7 @@
|
|
151 |
]
|
152 |
],
|
153 |
"image_token_index": 151646,
|
154 |
-
"model_type": "
|
155 |
"projector_hidden_act": "gelu",
|
156 |
"text_config": {
|
157 |
"_name_or_path": "Qwen/Qwen2-7B-Instruct",
|
@@ -162,29 +162,24 @@
|
|
162 |
"eos_token_id": 151645,
|
163 |
"hidden_size": 3584,
|
164 |
"intermediate_size": 18944,
|
165 |
-
"max_position_embeddings": 32768,
|
166 |
-
"max_window_layers": 28,
|
167 |
"model_type": "qwen2",
|
168 |
"num_attention_heads": 28,
|
169 |
"num_hidden_layers": 28,
|
170 |
"num_key_value_heads": 4,
|
171 |
"rope_theta": 1000000.0,
|
172 |
-
"sliding_window": null,
|
173 |
"torch_dtype": "bfloat16",
|
174 |
-
"use_sliding_window": false,
|
175 |
"vocab_size": 152128
|
176 |
},
|
177 |
"tie_word_embeddings": false,
|
178 |
"torch_dtype": "float16",
|
179 |
"transformers_version": "4.45.0.dev0",
|
180 |
"use_image_newline_parameter": true,
|
|
|
181 |
"vision_aspect_ratio": "anyres_max_9",
|
182 |
"vision_config": {
|
183 |
-
"hidden_act": "gelu_pytorch_tanh",
|
184 |
"hidden_size": 1152,
|
185 |
"image_size": 384,
|
186 |
"intermediate_size": 4304,
|
187 |
-
"layer_norm_eps": 1e-06,
|
188 |
"model_type": "siglip_vision_model",
|
189 |
"num_attention_heads": 16,
|
190 |
"num_hidden_layers": 26,
|
|
|
1 |
{
|
2 |
"_name_or_path": "/raid/raushan/si-7b",
|
3 |
"architectures": [
|
4 |
+
"LlavaOnevisionForConditionalGeneration"
|
5 |
],
|
6 |
"ignore_index": -100,
|
7 |
"image_grid_pinpoints": [
|
|
|
151 |
]
|
152 |
],
|
153 |
"image_token_index": 151646,
|
154 |
+
"model_type": "llava_onevision",
|
155 |
"projector_hidden_act": "gelu",
|
156 |
"text_config": {
|
157 |
"_name_or_path": "Qwen/Qwen2-7B-Instruct",
|
|
|
162 |
"eos_token_id": 151645,
|
163 |
"hidden_size": 3584,
|
164 |
"intermediate_size": 18944,
|
|
|
|
|
165 |
"model_type": "qwen2",
|
166 |
"num_attention_heads": 28,
|
167 |
"num_hidden_layers": 28,
|
168 |
"num_key_value_heads": 4,
|
169 |
"rope_theta": 1000000.0,
|
|
|
170 |
"torch_dtype": "bfloat16",
|
|
|
171 |
"vocab_size": 152128
|
172 |
},
|
173 |
"tie_word_embeddings": false,
|
174 |
"torch_dtype": "float16",
|
175 |
"transformers_version": "4.45.0.dev0",
|
176 |
"use_image_newline_parameter": true,
|
177 |
+
"video_token_index": 151647,
|
178 |
"vision_aspect_ratio": "anyres_max_9",
|
179 |
"vision_config": {
|
|
|
180 |
"hidden_size": 1152,
|
181 |
"image_size": 384,
|
182 |
"intermediate_size": 4304,
|
|
|
183 |
"model_type": "siglip_vision_model",
|
184 |
"num_attention_heads": 16,
|
185 |
"num_hidden_layers": 26,
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4911200360
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70b24c7c6a41076e26abbbff0f21ada8fa91f39ea9b79ff9b2fefb0c0321c890
|
3 |
size 4911200360
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1226266240
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d677a5896cb79d1a24927efd4f1b8eebdacce03943a38c1daca7bc3213091d75
|
3 |
size 1226266240
|