Image-Text-to-Text
Transformers
Safetensors
English
idefics2
pretraining
multimodal
vision
Inference Endpoints
File size: 483 Bytes
4444407
 
 
 
 
1
2
3
4
5
6
{
  "chat_template": "{% for message in messages %}{{message['role'].capitalize()}}{% if message['content'][0]['type'] == 'image' %}{{':'}}{% else %}{{': '}}{% endif %}{% for line in message['content'] %}{% if line['type'] == 'text' %}{{line['text']}}{% elif line['type'] == 'image' %}{{ '<image>' }}{% endif %}{% endfor %}<end_of_utterance>\n{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
  "image_seq_len": 64,
  "processor_class": "Idefics2Processor"
}