RaushanTurganbay HF staff commited on
Commit
0120fdc
1 Parent(s): cbe57d6

Upload LlavaOnevisionForConditionalGeneration

Browse files
README.md CHANGED
@@ -2,15 +2,15 @@
2
  language:
3
  - en
4
  - zh
5
- pipeline_tag: image-text-to-text
6
- inference: false
7
- arxiv: 2408.03326
8
  license: apache-2.0
9
  tags:
10
  - vision
11
  - image-text-to-text
12
  datasets:
13
  - lmms-lab/LLaVA-OneVision-Data
 
 
 
14
  ---
15
  # LLaVA-Onevision Model Card
16
 
 
2
  language:
3
  - en
4
  - zh
 
 
 
5
  license: apache-2.0
6
  tags:
7
  - vision
8
  - image-text-to-text
9
  datasets:
10
  - lmms-lab/LLaVA-OneVision-Data
11
+ pipeline_tag: image-text-to-text
12
+ inference: false
13
+ arxiv: 2408.03326
14
  ---
15
  # LLaVA-Onevision Model Card
16
 
config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_name_or_path": "/raid/raushan/si-7b",
3
  "architectures": [
4
- "LlavaNextForConditionalGeneration"
5
  ],
6
  "ignore_index": -100,
7
  "image_grid_pinpoints": [
@@ -151,7 +151,7 @@
151
  ]
152
  ],
153
  "image_token_index": 151646,
154
- "model_type": "llava_next",
155
  "projector_hidden_act": "gelu",
156
  "text_config": {
157
  "_name_or_path": "Qwen/Qwen2-7B-Instruct",
@@ -162,29 +162,24 @@
162
  "eos_token_id": 151645,
163
  "hidden_size": 3584,
164
  "intermediate_size": 18944,
165
- "max_position_embeddings": 32768,
166
- "max_window_layers": 28,
167
  "model_type": "qwen2",
168
  "num_attention_heads": 28,
169
  "num_hidden_layers": 28,
170
  "num_key_value_heads": 4,
171
  "rope_theta": 1000000.0,
172
- "sliding_window": null,
173
  "torch_dtype": "bfloat16",
174
- "use_sliding_window": false,
175
  "vocab_size": 152128
176
  },
177
  "tie_word_embeddings": false,
178
  "torch_dtype": "float16",
179
  "transformers_version": "4.45.0.dev0",
180
  "use_image_newline_parameter": true,
 
181
  "vision_aspect_ratio": "anyres_max_9",
182
  "vision_config": {
183
- "hidden_act": "gelu_pytorch_tanh",
184
  "hidden_size": 1152,
185
  "image_size": 384,
186
  "intermediate_size": 4304,
187
- "layer_norm_eps": 1e-06,
188
  "model_type": "siglip_vision_model",
189
  "num_attention_heads": 16,
190
  "num_hidden_layers": 26,
 
1
  {
2
  "_name_or_path": "/raid/raushan/si-7b",
3
  "architectures": [
4
+ "LlavaOnevisionForConditionalGeneration"
5
  ],
6
  "ignore_index": -100,
7
  "image_grid_pinpoints": [
 
151
  ]
152
  ],
153
  "image_token_index": 151646,
154
+ "model_type": "llava_onevision",
155
  "projector_hidden_act": "gelu",
156
  "text_config": {
157
  "_name_or_path": "Qwen/Qwen2-7B-Instruct",
 
162
  "eos_token_id": 151645,
163
  "hidden_size": 3584,
164
  "intermediate_size": 18944,
 
 
165
  "model_type": "qwen2",
166
  "num_attention_heads": 28,
167
  "num_hidden_layers": 28,
168
  "num_key_value_heads": 4,
169
  "rope_theta": 1000000.0,
 
170
  "torch_dtype": "bfloat16",
 
171
  "vocab_size": 152128
172
  },
173
  "tie_word_embeddings": false,
174
  "torch_dtype": "float16",
175
  "transformers_version": "4.45.0.dev0",
176
  "use_image_newline_parameter": true,
177
+ "video_token_index": 151647,
178
  "vision_aspect_ratio": "anyres_max_9",
179
  "vision_config": {
 
180
  "hidden_size": 1152,
181
  "image_size": 384,
182
  "intermediate_size": 4304,
 
183
  "model_type": "siglip_vision_model",
184
  "num_attention_heads": 16,
185
  "num_hidden_layers": 26,
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d57730bab78c99ac80380558a932a70a646de5de9da5bf285cc38a97c53b8ad
3
  size 4911200360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70b24c7c6a41076e26abbbff0f21ada8fa91f39ea9b79ff9b2fefb0c0321c890
3
  size 4911200360
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:407a356b29c7cbcb99c0a2952150fb1de322bb3cc688a6d42fdb0fc350596fa5
3
  size 1226266240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d677a5896cb79d1a24927efd4f1b8eebdacce03943a38c1daca7bc3213091d75
3
  size 1226266240