Spaces:
Running
Running
<div class="caption_settings"> | |
<div class="inline-drawer"> | |
<div class="inline-drawer-toggle inline-drawer-header"> | |
<b data-i18n="Image Captioning">Image Captioning</b> | |
<div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div> | |
</div> | |
<div class="inline-drawer-content"> | |
<label for="caption_source" data-i18n="Source">Source</label> | |
<select id="caption_source" class="text_pole"> | |
<option value="local" data-i18n="Local">Local</option> | |
<option value="multimodal" data-i18n="Multimodal (OpenAI / Anthropic / llama / Google)">Multimodal (OpenAI / Anthropic / llama / Google)</option> | |
<option value="extras" data-i18n="Extras">Extras (deprecated)</option> | |
<option value="horde" data-i18n="Horde">Horde</option> | |
</select> | |
<div id="caption_multimodal_block" class="flex-container wide100p"> | |
<div class="flex1 flex-container flexFlowColumn flexNoGap"> | |
<label for="caption_multimodal_api" data-i18n="API">API</label> | |
<select id="caption_multimodal_api" class="flex1 text_pole"> | |
<option value="zerooneai">01.AI (Yi)</option> | |
<option value="aimlapi">AI/ML API</option> | |
<option value="anthropic">Anthropic</option> | |
<option value="cohere">Cohere</option> | |
<option value="custom" data-i18n="Custom (OpenAI-compatible)">Custom (OpenAI-compatible)</option> | |
<option value="google">Google AI Studio</option> | |
<option value="vertexai">Google Vertex AI</option> | |
<option value="groq">Groq</option> | |
<option value="koboldcpp">KoboldCpp</option> | |
<option value="llamacpp">llama.cpp</option> | |
<option value="mistral">MistralAI</option> | |
<option value="ollama">Ollama</option> | |
<option value="openai">OpenAI</option> | |
<option value="openrouter">OpenRouter</option> | |
<option value="ooba" data-i18n="Text Generation WebUI (oobabooga)">Text Generation WebUI (oobabooga)</option> | |
<option value="pollinations">Pollinations</option> | |
<option value="vllm">vLLM</option> | |
<option value="xai">xAI (Grok)</option> | |
</select> | |
</div> | |
<div class="flex1 flex-container flexFlowColumn flexNoGap"> | |
<label for="caption_multimodal_model" data-i18n="Model">Model</label> | |
<select id="caption_multimodal_model" class="flex1 text_pole"> | |
<option data-type="cohere" value="c4ai-aya-vision-8b">c4ai-aya-vision-8b</option> | |
<option data-type="cohere" value="c4ai-aya-vision-32b">c4ai-aya-vision-32b</option> | |
<option data-type="mistral" value="pixtral-12b-latest">pixtral-12b-latest</option> | |
<option data-type="mistral" value="pixtral-12b-2409">pixtral-12b-2409</option> | |
<option data-type="mistral" value="pixtral-large-latest">pixtral-large-latest</option> | |
<option data-type="mistral" value="pixtral-large-2411">pixtral-large-2411</option> | |
<option data-type="mistral" value="mistral-large-pixtral-2411">mistral-large-pixtral-2411</option> | |
<option data-type="mistral" value="mistral-small-2503">mistral-small-2503</option> | |
<option data-type="mistral" value="mistral-small-latest">mistral-small-latest</option> | |
<option data-type="mistral" value="mistral-medium-latest">mistral-medium-latest</option> | |
<option data-type="mistral" value="mistral-medium-2505">mistral-medium-2505</option> | |
<option data-type="zerooneai" value="yi-vision">yi-vision</option> | |
<option data-type="openai" value="gpt-4.1">gpt-4.1</option> | |
<option data-type="openai" value="gpt-4.1-2025-04-14">gpt-4.1-2025-04-14</option> | |
<option data-type="openai" value="gpt-4.1-mini">gpt-4.1-mini</option> | |
<option data-type="openai" value="gpt-4.1-mini-2025-04-14">gpt-4.1-mini-2025-04-14</option> | |
<option data-type="openai" value="gpt-4.1-nano">gpt-4.1-nano</option> | |
<option data-type="openai" value="gpt-4.1-nano-2025-04-14">gpt-4.1-nano-2025-04-14</option> | |
<option data-type="openai" value="gpt-4-vision-preview">gpt-4-vision-preview</option> | |
<option data-type="openai" value="gpt-4-turbo">gpt-4-turbo</option> | |
<option data-type="openai" value="gpt-4o">gpt-4o</option> | |
<option data-type="openai" value="gpt-4o-mini">gpt-4o-mini</option> | |
<option data-type="openai" value="gpt-4o-mini-2024-07-18">gpt-4o-mini-2024-07-18</option> | |
<option data-type="openai" value="chatgpt-4o-latest">chatgpt-4o-latest</option> | |
<option data-type="openai" value="o1">o1</option> | |
<option data-type="openai" value="o1-2024-12-17">o1-2024-12-17</option> | |
<option data-type="openai" value="o3">o3</option> | |
<option data-type="openai" value="o3-2025-04-16">o3-2025-04-16</option> | |
<option data-type="openai" value="o4-mini">o4-mini</option> | |
<option data-type="openai" value="o4-mini-2025-04-16">o4-mini-2025-04-16</option> | |
<option data-type="openai" value="gpt-4.5-preview">gpt-4.5-preview</option> | |
<option data-type="openai" value="gpt-4.5-preview-2025-02-27">gpt-4.5-preview-2025-02-27</option> | |
<option data-type="anthropic" value="claude-opus-4-0">claude-opus-4-0</option> | |
<option data-type="anthropic" value="claude-opus-4-20250514">claude-opus-4-20250514</option> | |
<option data-type="anthropic" value="claude-sonnet-4-0">claude-sonnet-4-0</option> | |
<option data-type="anthropic" value="claude-sonnet-4-20250514">claude-sonnet-4-20250514</option> | |
<option data-type="anthropic" value="claude-3-7-sonnet-latest">claude-3-7-sonnet-latest</option> | |
<option data-type="anthropic" value="claude-3-7-sonnet-20250219">claude-3-7-sonnet-20250219</option> | |
<option data-type="anthropic" value="claude-3-5-sonnet-latest">claude-3-5-sonnet-latest</option> | |
<option data-type="anthropic" value="claude-3-5-sonnet-20241022">claude-3-5-sonnet-20241022</option> | |
<option data-type="anthropic" value="claude-3-5-sonnet-20240620">claude-3-5-sonnet-20240620</option> | |
<option data-type="anthropic" value="claude-3-5-haiku-latest">claude-3-5-haiku-latest</option> | |
<option data-type="anthropic" value="claude-3-5-haiku-20241022">claude-3-5-haiku-20241022</option> | |
<option data-type="anthropic" value="claude-3-opus-20240229">claude-3-opus-20240229</option> | |
<option data-type="anthropic" value="claude-3-sonnet-20240229">claude-3-sonnet-20240229</option> | |
<option data-type="anthropic" value="claude-3-haiku-20240307">claude-3-haiku-20240307</option> | |
<option data-type="google" value="gemini-2.5-pro">gemini-2.5-pro</option> | |
<option data-type="google" value="gemini-2.5-pro-preview-06-05">gemini-2.5-pro-preview-06-05</option> | |
<option data-type="google" value="gemini-2.5-pro-preview-05-06">gemini-2.5-pro-preview-05-06</option> | |
<option data-type="google" value="gemini-2.5-pro-preview-03-25">gemini-2.5-pro-preview-03-25</option> | |
<option data-type="google" value="gemini-2.5-pro-exp-03-25">gemini-2.5-pro-exp-03-25</option> | |
<option data-type="google" value="gemini-2.5-flash">gemini-2.5-flash</option> | |
<option data-type="google" value="gemini-2.5-flash-preview-05-20">gemini-2.5-flash-preview-05-20</option> | |
<option data-type="google" value="gemini-2.5-flash-preview-04-17">gemini-2.5-flash-preview-04-17</option> | |
<option data-type="google" value="gemini-2.5-flash-lite-preview-06-17">gemini-2.5-flash-lite-preview-06-17</option> | |
<option data-type="google" value="gemini-2.0-pro-exp-02-05">gemini-2.0-pro-exp-02-05 → 2.5-pro-exp-03-25</option> | |
<option data-type="google" value="gemini-2.0-pro-exp">gemini-2.0-pro-exp → 2.5-pro-exp-03-25</option> | |
<option data-type="google" value="gemini-exp-1206">gemini-exp-1206 → 2.5-pro-exp-03-25</option> | |
<option data-type="google" value="gemini-2.0-flash-001">gemini-2.0-flash-001</option> | |
<option data-type="google" value="gemini-2.0-flash-exp-image-generation">gemini-2.0-flash-exp-image-generation</option> | |
<option data-type="google" value="gemini-2.0-flash-exp">gemini-2.0-flash-exp</option> | |
<option data-type="google" value="gemini-2.0-flash">gemini-2.0-flash</option> | |
<option data-type="google" value="gemini-2.0-flash-thinking-exp-01-21">gemini-2.0-flash-thinking-exp-01-21 → 2.5-flash-preview-04-17</option> | |
<option data-type="google" value="gemini-2.0-flash-thinking-exp-1219">gemini-2.0-flash-thinking-exp-1219 → 2.5-flash-preview-04-17</option> | |
<option data-type="google" value="gemini-2.0-flash-thinking-exp">gemini-2.0-flash-thinking-exp → 2.5-flash-preview-04-17</option> | |
<option data-type="google" value="gemini-2.0-flash-lite-001">gemini-2.0-flash-lite-001</option> | |
<option data-type="google" value="gemini-2.0-flash-lite-preview-02-05">gemini-2.0-flash-lite-preview-02-05</option> | |
<option data-type="google" value="gemini-2.0-flash-lite-preview">gemini-2.0-flash-lite-preview</option> | |
<option data-type="google" value="gemini-1.5-pro-latest">gemini-1.5-pro-latest</option> | |
<option data-type="google" value="gemini-1.5-pro-002">gemini-1.5-pro-002</option> | |
<option data-type="google" value="gemini-1.5-pro-001">gemini-1.5-pro-001</option> | |
<option data-type="google" value="gemini-1.5-pro">gemini-1.5-pro</option> | |
<option data-type="google" value="gemini-1.5-flash-latest">gemini-1.5-flash-latest</option> | |
<option data-type="google" value="gemini-1.5-flash-002">gemini-1.5-flash-002</option> | |
<option data-type="google" value="gemini-1.5-flash-001">gemini-1.5-flash-001</option> | |
<option data-type="google" value="gemini-1.5-flash">gemini-1.5-flash</option> | |
<option data-type="google" value="gemini-1.5-flash-8b-001">gemini-1.5-flash-8b-001</option> | |
<option data-type="google" value="gemini-1.5-flash-8b-exp-0924">gemini-1.5-flash-8b-exp-0924</option> | |
<option data-type="google" value="gemini-1.5-flash-8b-exp-0827">gemini-1.5-flash-8b-exp-0827</option> | |
<option data-type="google" value="learnlm-2.0-flash-experimental">learnlm-2.0-flash-experimental</option> | |
<option data-type="google" value="learnlm-1.5-pro-experimental">learnlm-1.5-pro-experimental</option> | |
<option data-type="vertexai" value="gemini-2.5-pro">gemini-2.5-pro</option> | |
<option data-type="vertexai" value="gemini-2.5-pro-preview-06-05">gemini-2.5-pro-preview-06-05</option> | |
<option data-type="vertexai" value="gemini-2.5-pro-preview-05-06">gemini-2.5-pro-preview-05-06</option> | |
<option data-type="vertexai" value="gemini-2.5-pro-preview-03-25">gemini-2.5-pro-preview-03-25</option> | |
<option data-type="vertexai" value="gemini-2.5-flash">gemini-2.5-flash</option> | |
<option data-type="vertexai" value="gemini-2.5-flash-preview-05-20">gemini-2.5-flash-preview-05-20</option> | |
<option data-type="vertexai" value="gemini-2.5-flash-preview-04-17">gemini-2.5-flash-preview-04-17</option> | |
<option data-type="vertexai" value="gemini-2.5-flash-lite-preview-06-17">gemini-2.5-flash-lite-preview-06-17</option> | |
<option data-type="vertexai" value="gemini-2.0-flash-001">gemini-2.0-flash-001</option> | |
<option data-type="vertexai" value="gemini-2.0-flash-lite-001">gemini-2.0-flash-lite-001</option> | |
<option data-type="groq" value="llama-3.2-11b-vision-preview">llama-3.2-11b-vision-preview</option> | |
<option data-type="groq" value="llama-3.2-90b-vision-preview">llama-3.2-90b-vision-preview</option> | |
<option data-type="groq" value="llava-v1.5-7b-4096-preview">llava-v1.5-7b-4096-preview</option> | |
<option data-type="aimlapi" value="gpt-4o-2024-05-13">gpt-4o-2024-05-13</option> | |
<option data-type="aimlapi" value="gpt-4o-2024-08-06">gpt-4o-2024-08-06</option> | |
<option data-type="aimlapi" value="gpt-4o">gpt-4o</option> | |
<option data-type="aimlapi" value="gpt-4o-mini">gpt-4o-mini</option> | |
<option data-type="aimlapi" value="gpt-4o-mini-2024-07-18">gpt-4o-mini-2024-07-18</option> | |
<option data-type="aimlapi" value="chatgpt-4o-latest">chatgpt-4o-latest</option> | |
<option data-type="aimlapi" value="gpt-4-turbo">gpt-4-turbo</option> | |
<option data-type="aimlapi" value="claude-3-opus-20240229">claude-3-opus-20240229</option> | |
<option data-type="aimlapi" value="claude-3-sonnet-20240229">claude-3-sonnet-20240229</option> | |
<option data-type="aimlapi" value="claude-3-haiku-20240307">claude-3-haiku-20240307</option> | |
<option data-type="aimlapi" value="claude-3-5-sonnet-20240620">claude-3-5-sonnet-20240620</option> | |
<option data-type="aimlapi" value="google/gemini-1.5-flash">google/gemini-1.5-flash</option> | |
<option data-type="aimlapi" value="google/gemini-1.5-pro">google/gemini-1.5-pro</option> | |
<option data-type="aimlapi" value="google/gemini-2.0-flash">google/gemini-2.0-flash</option> | |
<option data-type="aimlapi" value="google/gemini-2.0-flash-exp">google/gemini-2.0-flash-exp</option> | |
<option data-type="aimlapi" value="google/gemini-2.5-flash-preview">google/gemini-2.5-flash-preview</option> | |
<option data-type="aimlapi" value="meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo">meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo</option> | |
<option data-type="aimlapi" value="meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo">meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo</option> | |
<option data-type="openrouter" value="openai/gpt-4-vision-preview">openai/gpt-4-vision-preview</option> | |
<option data-type="openrouter" value="openai/gpt-4o">openai/gpt-4o</option> | |
<option data-type="openrouter" value="openai/gpt-4o-2024-05-13">openai/gpt-4o-2024-05-13</option> | |
<option data-type="openrouter" value="openai/gpt-4o-2024-08-06">openai/gpt-4o-2024-08-06</option> | |
<option data-type="openrouter" value="openai/gpt-4-turbo">openai/gpt-4-turbo</option> | |
<option data-type="openrouter" value="openai/gpt-4o-mini">openai/gpt-4o-mini</option> | |
<option data-type="openrouter" value="openai/gpt-4o-mini-2024-07-18">openai/gpt-4o-mini-2024-07-18</option> | |
<option data-type="openrouter" value="openai/chatgpt-4o-latest">openai/chatgpt-4o-latest</option> | |
<option data-type="openrouter" value="haotian-liu/llava-13b">haotian-liu/llava-13b</option> | |
<option data-type="openrouter" value="fireworks/firellava-13b">fireworks/firellava-13b</option> | |
<option data-type="openrouter" value="anthropic/claude-3.5-sonnet">anthropic/claude-3.5-sonnet</option> | |
<option data-type="openrouter" value="anthropic/claude-3-haiku">anthropic/claude-3-haiku</option> | |
<option data-type="openrouter" value="anthropic/claude-3-sonnet">anthropic/claude-3-sonnet</option> | |
<option data-type="openrouter" value="anthropic/claude-3-opus">anthropic/claude-3-opus</option> | |
<option data-type="openrouter" value="anthropic/claude-3.5-sonnet:beta">anthropic/claude-3.5-sonnet:beta</option> | |
<option data-type="openrouter" value="anthropic/claude-3-haiku:beta">anthropic/claude-3-haiku:beta</option> | |
<option data-type="openrouter" value="anthropic/claude-3-sonnet:beta">anthropic/claude-3-sonnet:beta</option> | |
<option data-type="openrouter" value="anthropic/claude-3-opus:beta">anthropic/claude-3-opus:beta</option> | |
<option data-type="openrouter" value="nousresearch/nous-hermes-2-vision-7b">nousresearch/nous-hermes-2-vision-7b</option> | |
<option data-type="openrouter" value="google/gemini-flash-8b-1.5-exp">google/gemini-flash-8b-1.5-exp</option> | |
<option data-type="openrouter" value="google/gemini-flash-1.5">google/gemini-flash-1.5</option> | |
<option data-type="openrouter" value="google/gemini-flash-1.5-exp">google/gemini-flash-1.5-exp</option> | |
<option data-type="openrouter" value="google/gemini-pro-1.5">google/gemini-pro-1.5</option> | |
<option data-type="openrouter" value="google/gemini-pro-1.5-exp">google/gemini-pro-1.5-exp</option> | |
<option data-type="openrouter" value="google/gemini-pro-vision">google/gemini-pro-vision</option> | |
<option data-type="openrouter" value="liuhaotian/llava-yi-34b">liuhaotian/llava-yi-34b</option> | |
<option data-type="ollama" value="ollama_current" data-i18n="currently_selected">[Currently selected]</option> | |
<option data-type="ollama" value="bakllava">bakllava</option> | |
<option data-type="ollama" value="llava">llava</option> | |
<option data-type="ollama" value="llava-llama3">llava-llama3</option> | |
<option data-type="ollama" value="llava-phi3">llava-phi3</option> | |
<option data-type="ollama" value="moondream">moondream</option> | |
<option data-type="llamacpp" value="llamacpp_current" data-i18n="currently_loaded">[Currently loaded]</option> | |
<option data-type="ooba" value="ooba_current" data-i18n="currently_loaded">[Currently loaded]</option> | |
<option data-type="koboldcpp" value="koboldcpp_current" data-i18n="currently_loaded">[Currently loaded]</option> | |
<option data-type="vllm" value="vllm_current" data-i18n="currently_selected">[Currently selected]</option> | |
<option data-type="custom" value="custom_current" data-i18n="currently_selected">[Currently selected]</option> | |
<option data-type="xai" value="grok-2-vision-1212">grok-2-vision-1212</option> | |
<option data-type="xai" value="grok-vision-beta">grok-vision-beta</option> | |
<option data-type="pollinations" value="openai">openai</option> | |
<option data-type="pollinations" value="openai-fast">openai-fast</option> | |
<option data-type="pollinations" value="openai-large">openai-large</option> | |
<option data-type="pollinations" value="openai-roblox">openai-roblox</option> | |
<option data-type="pollinations" value="mistral">mistral</option> | |
<option data-type="pollinations" value="unity">unity</option> | |
<option data-type="pollinations" value="mirexa">mirexa</option> | |
<option data-type="pollinations" value="searchgpt">searchgpt</option> | |
<option data-type="pollinations" value="evil">evil</option> | |
<option data-type="pollinations" value="phi">phi</option> | |
<option data-type="pollinations" value="sur">sur</option> | |
<option data-type="pollinations" value="bidara">bidara</option> | |
</select> | |
</div> | |
<div data-type="ollama"> | |
The model must be downloaded first! Do it with the <code>ollama pull</code> command or <a href="#" id="caption_ollama_pull">click here</a>. | |
</div> | |
<label data-type="openai,anthropic,google,vertexai,mistral,xai" class="checkbox_label flexBasis100p" for="caption_allow_reverse_proxy" title="Allow using reverse proxy if defined and valid."> | |
<input id="caption_allow_reverse_proxy" type="checkbox" class="checkbox"> | |
<span data-i18n="Allow reverse proxy">Allow reverse proxy</span> | |
</label> | |
<div class="flexBasis100p m-b-1"> | |
<small><b data-i18n="Hint:">Hint:</b> <span data-i18n="Set your API keys and endpoints in the 'API Connections' tab first.">Set your API keys and endpoints in the 'API Connections' tab first.</span></small> | |
</div> | |
<div data-type="koboldcpp,ollama,vllm,llamacpp,ooba" class="flex-container flexFlowColumn"> | |
<label for="caption_altEndpoint_enabled" class="checkbox_label"> | |
<input id="caption_altEndpoint_enabled" type="checkbox"> | |
<span data-i18n="Use secondary URL">Use secondary URL</span> | |
</label> | |
<label for="caption_altEndpoint_url" data-i18n="Secondary captioning endpoint URL"> | |
Secondary captioning endpoint URL | |
</label> | |
<input id="caption_altEndpoint_url" class="text_pole" type="text" placeholder="e.g. http://localhost:5001" /> | |
</div> | |
</div> | |
<div id="caption_prompt_block"> | |
<label for="caption_prompt" data-i18n="Caption Prompt">Caption Prompt</label> | |
<textarea id="caption_prompt" class="text_pole" rows="1" placeholder="< Use default >">{{PROMPT_DEFAULT}}</textarea> | |
<label class="checkbox_label margin-bot-10px" for="caption_prompt_ask" title="Ask for a custom prompt every time an image is captioned."> | |
<input id="caption_prompt_ask" type="checkbox" class="checkbox"> | |
<span data-i18n="Ask every time">Ask every time</span> | |
</label> | |
</div> | |
<label for="caption_template"><span data-i18n="Message Template">Message Template</span> <small><span data-i18n="(use _space">(use </span> <code>{{caption}}</code> <span data-i18n="macro)">macro)</span></small></label> | |
<textarea id="caption_template" class="text_pole" rows="2" placeholder="< Use default >">{{TEMPLATE_DEFAULT}}</textarea> | |
<label class="checkbox_label" for="caption_auto_mode"> | |
<input id="caption_auto_mode" type="checkbox" class="checkbox"> | |
<span data-i18n="Automatically caption images">Automatically caption images</span> | |
<i class="fa-solid fa-info-circle" title="Automatically caption images when they are pasted into the chat or attached to messages."></i> | |
</label> | |
<label class="checkbox_label margin-bot-10px" for="caption_refine_mode"> | |
<input id="caption_refine_mode" type="checkbox" class="checkbox"> | |
<span data-i18n="Edit captions before saving">Edit captions before saving</span> | |
</label> | |
</div> | |
</div> | |
</div> | |