Upload KawnIdefics3ForConditionalGeneration
Browse files- config.json +40 -34
- model-00001-of-00004.safetensors +2 -2
- model-00002-of-00004.safetensors +2 -2
- model-00003-of-00004.safetensors +2 -2
- model-00004-of-00004.safetensors +2 -2
- model.safetensors.index.json +9 -20
config.json
CHANGED
@@ -1,72 +1,76 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"KawnIdefics3ForConditionalGeneration"
|
5 |
],
|
6 |
"ignore_index": -100,
|
7 |
"image_grid_pinpoints": [
|
8 |
[
|
9 |
-
|
10 |
-
|
11 |
],
|
12 |
[
|
13 |
-
|
14 |
-
|
15 |
],
|
16 |
[
|
17 |
-
|
18 |
-
|
19 |
],
|
20 |
[
|
21 |
-
|
22 |
-
|
23 |
],
|
24 |
[
|
25 |
-
|
26 |
-
|
27 |
],
|
28 |
[
|
29 |
-
|
30 |
-
|
31 |
],
|
32 |
[
|
33 |
-
|
34 |
-
|
35 |
],
|
36 |
[
|
37 |
-
|
38 |
-
|
39 |
],
|
40 |
[
|
41 |
-
|
42 |
-
|
43 |
],
|
44 |
[
|
45 |
-
|
46 |
-
|
47 |
],
|
48 |
[
|
49 |
-
|
50 |
-
|
51 |
],
|
52 |
[
|
53 |
-
|
54 |
-
|
55 |
],
|
56 |
[
|
57 |
-
|
58 |
-
|
59 |
],
|
60 |
[
|
61 |
-
|
62 |
-
|
63 |
],
|
64 |
[
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
67 |
]
|
68 |
],
|
69 |
-
"image_seq_length":
|
70 |
"image_token_id": 255030,
|
71 |
"model_type": "kawn_idefics3",
|
72 |
"scale_factor": 2,
|
@@ -91,12 +95,14 @@
|
|
91 |
"vision_config": {
|
92 |
"_attn_implementation_autoset": true,
|
93 |
"hidden_size": 1152,
|
94 |
-
"image_size":
|
|
|
95 |
"intermediate_size": 4304,
|
96 |
"model_type": "siglip_vision_model",
|
97 |
"num_attention_heads": 16,
|
98 |
"num_hidden_layers": 27,
|
99 |
-
"patch_size": 14
|
|
|
100 |
},
|
101 |
"vision_feature_layer": null
|
102 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "kawn_cohere_8b_idefics3_siglib14_384",
|
3 |
"architectures": [
|
4 |
"KawnIdefics3ForConditionalGeneration"
|
5 |
],
|
6 |
"ignore_index": -100,
|
7 |
"image_grid_pinpoints": [
|
8 |
[
|
9 |
+
364,
|
10 |
+
364
|
11 |
],
|
12 |
[
|
13 |
+
364,
|
14 |
+
728
|
15 |
],
|
16 |
[
|
17 |
+
364,
|
18 |
+
1092
|
19 |
],
|
20 |
[
|
21 |
+
1092,
|
22 |
+
364
|
23 |
],
|
24 |
[
|
25 |
+
728,
|
26 |
+
364
|
27 |
],
|
28 |
[
|
29 |
+
728,
|
30 |
+
728
|
31 |
],
|
32 |
[
|
33 |
+
728,
|
34 |
+
1092
|
35 |
],
|
36 |
[
|
37 |
+
1092,
|
38 |
+
728
|
39 |
],
|
40 |
[
|
41 |
+
1092,
|
42 |
+
1092
|
43 |
],
|
44 |
[
|
45 |
+
364,
|
46 |
+
1456
|
47 |
],
|
48 |
[
|
49 |
+
1456,
|
50 |
+
364
|
51 |
],
|
52 |
[
|
53 |
+
728,
|
54 |
+
1456
|
55 |
],
|
56 |
[
|
57 |
+
1456,
|
58 |
+
728
|
59 |
],
|
60 |
[
|
61 |
+
1456,
|
62 |
+
1092
|
63 |
],
|
64 |
[
|
65 |
+
1092,
|
66 |
+
1456
|
67 |
+
],
|
68 |
+
[
|
69 |
+
1456,
|
70 |
+
1456
|
71 |
]
|
72 |
],
|
73 |
+
"image_seq_length": 169,
|
74 |
"image_token_id": 255030,
|
75 |
"model_type": "kawn_idefics3",
|
76 |
"scale_factor": 2,
|
|
|
95 |
"vision_config": {
|
96 |
"_attn_implementation_autoset": true,
|
97 |
"hidden_size": 1152,
|
98 |
+
"image_size": 364,
|
99 |
+
"initializer_range": 0.02,
|
100 |
"intermediate_size": 4304,
|
101 |
"model_type": "siglip_vision_model",
|
102 |
"num_attention_heads": 16,
|
103 |
"num_hidden_layers": 27,
|
104 |
+
"patch_size": 14,
|
105 |
+
"vision_use_head": false
|
106 |
},
|
107 |
"vision_feature_layer": null
|
108 |
}
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3667ce67891748a4c70bda73a78180f350af45928edb155e09a8a170f42dd4a
|
3 |
+
size 4986700208
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9ba99cd16ad5fe805e4f2df166fd37701fa7ea041a43854e6995db50c23d21e
|
3 |
+
size 4999720960
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f49966c2aa4946b0e08c029ef5228663d6468cbdd6550376856294a32842acf
|
3 |
+
size 4915826080
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:beeb85936900fcf16af24c88c3eea1d9686ab50561922c949097a08d48891049
|
3 |
+
size 2017514168
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"connector.proj.weight": "model-00004-of-00004.safetensors",
|
@@ -61,8 +61,8 @@
|
|
61 |
"language_model.model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
62 |
"language_model.model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
63 |
"language_model.model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
64 |
-
"language_model.model.layers.15.input_layernorm.weight": "model-
|
65 |
-
"language_model.model.layers.15.mlp.down_proj.weight": "model-
|
66 |
"language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
67 |
"language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
68 |
"language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
@@ -73,10 +73,10 @@
|
|
73 |
"language_model.model.layers.16.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
74 |
"language_model.model.layers.16.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
75 |
"language_model.model.layers.16.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
76 |
-
"language_model.model.layers.16.self_attn.k_proj.weight": "model-
|
77 |
-
"language_model.model.layers.16.self_attn.o_proj.weight": "model-
|
78 |
-
"language_model.model.layers.16.self_attn.q_proj.weight": "model-
|
79 |
-
"language_model.model.layers.16.self_attn.v_proj.weight": "model-
|
80 |
"language_model.model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
81 |
"language_model.model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
82 |
"language_model.model.layers.17.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
@@ -167,7 +167,7 @@
|
|
167 |
"language_model.model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
168 |
"language_model.model.layers.27.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
169 |
"language_model.model.layers.27.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
170 |
-
"language_model.model.layers.27.mlp.gate_proj.weight": "model-
|
171 |
"language_model.model.layers.27.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
172 |
"language_model.model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
173 |
"language_model.model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
@@ -216,7 +216,7 @@
|
|
216 |
"language_model.model.layers.4.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
217 |
"language_model.model.layers.4.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
218 |
"language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
219 |
-
"language_model.model.layers.4.mlp.up_proj.weight": "model-
|
220 |
"language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
221 |
"language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
222 |
"language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
@@ -697,17 +697,6 @@
|
|
697 |
"vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
698 |
"vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
699 |
"vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
700 |
-
"vision_tower.vision_model.head.attention.in_proj_bias": "model-00001-of-00004.safetensors",
|
701 |
-
"vision_tower.vision_model.head.attention.in_proj_weight": "model-00001-of-00004.safetensors",
|
702 |
-
"vision_tower.vision_model.head.attention.out_proj.bias": "model-00001-of-00004.safetensors",
|
703 |
-
"vision_tower.vision_model.head.attention.out_proj.weight": "model-00001-of-00004.safetensors",
|
704 |
-
"vision_tower.vision_model.head.layernorm.bias": "model-00001-of-00004.safetensors",
|
705 |
-
"vision_tower.vision_model.head.layernorm.weight": "model-00001-of-00004.safetensors",
|
706 |
-
"vision_tower.vision_model.head.mlp.fc1.bias": "model-00001-of-00004.safetensors",
|
707 |
-
"vision_tower.vision_model.head.mlp.fc1.weight": "model-00001-of-00004.safetensors",
|
708 |
-
"vision_tower.vision_model.head.mlp.fc2.bias": "model-00001-of-00004.safetensors",
|
709 |
-
"vision_tower.vision_model.head.mlp.fc2.weight": "model-00001-of-00004.safetensors",
|
710 |
-
"vision_tower.vision_model.head.probe": "model-00001-of-00004.safetensors",
|
711 |
"vision_tower.vision_model.post_layernorm.bias": "model-00001-of-00004.safetensors",
|
712 |
"vision_tower.vision_model.post_layernorm.weight": "model-00001-of-00004.safetensors"
|
713 |
}
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 16919667168
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"connector.proj.weight": "model-00004-of-00004.safetensors",
|
|
|
61 |
"language_model.model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
62 |
"language_model.model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
63 |
"language_model.model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
64 |
+
"language_model.model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
65 |
+
"language_model.model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
66 |
"language_model.model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
67 |
"language_model.model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
68 |
"language_model.model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
|
|
73 |
"language_model.model.layers.16.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
74 |
"language_model.model.layers.16.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
75 |
"language_model.model.layers.16.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
76 |
+
"language_model.model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
77 |
+
"language_model.model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
78 |
+
"language_model.model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
79 |
+
"language_model.model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
80 |
"language_model.model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
81 |
"language_model.model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
82 |
"language_model.model.layers.17.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
|
|
167 |
"language_model.model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
168 |
"language_model.model.layers.27.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
169 |
"language_model.model.layers.27.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
170 |
+
"language_model.model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
171 |
"language_model.model.layers.27.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
172 |
"language_model.model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
173 |
"language_model.model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
|
|
216 |
"language_model.model.layers.4.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
217 |
"language_model.model.layers.4.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
218 |
"language_model.model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
219 |
+
"language_model.model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
220 |
"language_model.model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
221 |
"language_model.model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
222 |
"language_model.model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
697 |
"vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
698 |
"vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
699 |
"vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
700 |
"vision_tower.vision_model.post_layernorm.bias": "model-00001-of-00004.safetensors",
|
701 |
"vision_tower.vision_model.post_layernorm.weight": "model-00001-of-00004.safetensors"
|
702 |
}
|