Image-Text-to-Text
Transformers
Safetensors
English
idefics2
pretraining
multimodal
vision
Inference Endpoints
5 papers
amyeroberts HF staff commited on
Commit
1cb5729
1 Parent(s): aaddfc9

Upload folder using huggingface_hub

Browse files
model-00001-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42c7811c3d0085534c037964fa972ecbaa3a274b27f8fceee62b9d14e07a42b9
3
- size 4644107280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88850fcdb9b975d89c2b20068743b20aaaa2eebef90eff37a5f27324c49f169d
3
+ size 4644107632
model.safetensors.index.json CHANGED
@@ -4,41 +4,41 @@
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00007-of-00007.safetensors",
7
- "model.modality_projection.down_proj.weight": "model-00001-of-00007.safetensors",
8
- "model.modality_projection.gate_proj.weight": "model-00001-of-00007.safetensors",
9
- "model.modality_projection.up_proj.weight": "model-00001-of-00007.safetensors",
10
- "model.perceiver_resampler.latents": "model-00001-of-00007.safetensors",
11
- "model.perceiver_resampler.layers.0.input_context_norm.weight": "model-00001-of-00007.safetensors",
12
- "model.perceiver_resampler.layers.0.input_latents_norm.weight": "model-00001-of-00007.safetensors",
13
- "model.perceiver_resampler.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
14
- "model.perceiver_resampler.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
15
- "model.perceiver_resampler.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
16
- "model.perceiver_resampler.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
17
- "model.perceiver_resampler.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
18
- "model.perceiver_resampler.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
19
- "model.perceiver_resampler.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
20
- "model.perceiver_resampler.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
21
- "model.perceiver_resampler.layers.1.input_context_norm.weight": "model-00001-of-00007.safetensors",
22
- "model.perceiver_resampler.layers.1.input_latents_norm.weight": "model-00001-of-00007.safetensors",
23
- "model.perceiver_resampler.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
24
- "model.perceiver_resampler.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
25
- "model.perceiver_resampler.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
26
- "model.perceiver_resampler.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
27
- "model.perceiver_resampler.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
28
- "model.perceiver_resampler.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
29
- "model.perceiver_resampler.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
30
- "model.perceiver_resampler.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
31
- "model.perceiver_resampler.layers.2.input_context_norm.weight": "model-00001-of-00007.safetensors",
32
- "model.perceiver_resampler.layers.2.input_latents_norm.weight": "model-00001-of-00007.safetensors",
33
- "model.perceiver_resampler.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
34
- "model.perceiver_resampler.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
35
- "model.perceiver_resampler.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
36
- "model.perceiver_resampler.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
37
- "model.perceiver_resampler.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
38
- "model.perceiver_resampler.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
39
- "model.perceiver_resampler.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
40
- "model.perceiver_resampler.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
41
- "model.perceiver_resampler.norm.weight": "model-00001-of-00007.safetensors",
42
  "model.text_model.embed_tokens.weight": "model-00002-of-00007.safetensors",
43
  "model.text_model.layers.0.input_layernorm.weight": "model-00002-of-00007.safetensors",
44
  "model.text_model.layers.0.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
 
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00007-of-00007.safetensors",
7
+ "model.connector.modality_projection.down_proj.weight": "model-00001-of-00007.safetensors",
8
+ "model.connector.modality_projection.gate_proj.weight": "model-00001-of-00007.safetensors",
9
+ "model.connector.modality_projection.up_proj.weight": "model-00001-of-00007.safetensors",
10
+ "model.connector.perceiver_resampler.latents": "model-00001-of-00007.safetensors",
11
+ "model.connector.perceiver_resampler.layers.0.input_context_norm.weight": "model-00001-of-00007.safetensors",
12
+ "model.connector.perceiver_resampler.layers.0.input_latents_norm.weight": "model-00001-of-00007.safetensors",
13
+ "model.connector.perceiver_resampler.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
14
+ "model.connector.perceiver_resampler.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
15
+ "model.connector.perceiver_resampler.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
16
+ "model.connector.perceiver_resampler.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
17
+ "model.connector.perceiver_resampler.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
18
+ "model.connector.perceiver_resampler.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
19
+ "model.connector.perceiver_resampler.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
20
+ "model.connector.perceiver_resampler.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
21
+ "model.connector.perceiver_resampler.layers.1.input_context_norm.weight": "model-00001-of-00007.safetensors",
22
+ "model.connector.perceiver_resampler.layers.1.input_latents_norm.weight": "model-00001-of-00007.safetensors",
23
+ "model.connector.perceiver_resampler.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
24
+ "model.connector.perceiver_resampler.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
25
+ "model.connector.perceiver_resampler.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
26
+ "model.connector.perceiver_resampler.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
27
+ "model.connector.perceiver_resampler.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
28
+ "model.connector.perceiver_resampler.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
29
+ "model.connector.perceiver_resampler.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
30
+ "model.connector.perceiver_resampler.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
31
+ "model.connector.perceiver_resampler.layers.2.input_context_norm.weight": "model-00001-of-00007.safetensors",
32
+ "model.connector.perceiver_resampler.layers.2.input_latents_norm.weight": "model-00001-of-00007.safetensors",
33
+ "model.connector.perceiver_resampler.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
34
+ "model.connector.perceiver_resampler.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
35
+ "model.connector.perceiver_resampler.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
36
+ "model.connector.perceiver_resampler.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
37
+ "model.connector.perceiver_resampler.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
38
+ "model.connector.perceiver_resampler.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
39
+ "model.connector.perceiver_resampler.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
40
+ "model.connector.perceiver_resampler.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
41
+ "model.connector.perceiver_resampler.norm.weight": "model-00001-of-00007.safetensors",
42
  "model.text_model.embed_tokens.weight": "model-00002-of-00007.safetensors",
43
  "model.text_model.layers.0.input_layernorm.weight": "model-00002-of-00007.safetensors",
44
  "model.text_model.layers.0.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
special_tokens_map.json CHANGED
@@ -13,6 +13,13 @@
13
  "normalized": false,
14
  "rstrip": false,
15
  "single_word": false
 
 
 
 
 
 
 
16
  }
17
  ],
18
  "bos_token": {
 
13
  "normalized": false,
14
  "rstrip": false,
15
  "single_word": false
16
+ },
17
+ {
18
+ "content": "<end_of_utterance>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
  }
24
  ],
25
  "bos_token": {
tokenizer.json CHANGED
@@ -47,6 +47,15 @@
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "normalizer": {
 
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
50
+ },
51
+ {
52
+ "id": 32002,
53
+ "content": "<end_of_utterance>",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
  }
60
  ],
61
  "normalizer": {
tokenizer_config.json CHANGED
@@ -41,11 +41,20 @@
41
  "rstrip": false,
42
  "single_word": false,
43
  "special": true
 
 
 
 
 
 
 
 
44
  }
45
  },
46
  "additional_special_tokens": [
47
  "<fake_token_around_image>",
48
- "<image>"
 
49
  ],
50
  "bos_token": "<s>",
51
  "clean_up_tokenization_spaces": false,
 
41
  "rstrip": false,
42
  "single_word": false,
43
  "special": true
44
+ },
45
+ "32002": {
46
+ "content": "<end_of_utterance>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
  }
53
  },
54
  "additional_special_tokens": [
55
  "<fake_token_around_image>",
56
+ "<image>",
57
+ "<end_of_utterance>"
58
  ],
59
  "bos_token": "<s>",
60
  "clean_up_tokenization_spaces": false,