|
import torch |
|
import gradio as gr |
|
from promptcap import PromptCap |
|
|
|
|
|
model = PromptCap("tifa-benchmark/promptcap-coco-vqa") |
|
|
|
|
|
if torch.cuda.is_available(): |
|
model.cuda() |
|
|
|
|
|
def generate_caption(image, question): |
|
prompt = f"please describe this image according to the given question: {question}" |
|
|
|
caption = model.caption(prompt, image) |
|
return caption |
|
|
|
|
|
interface = gr.Interface( |
|
fn=generate_caption, |
|
inputs=[ |
|
gr.inputs.Image(type="filepath", label="Input Image"), |
|
gr.inputs.Textbox(label="Question") |
|
], |
|
outputs=gr.outputs.Textbox(label="Generated Caption") |
|
) |
|
|
|
|
|
interface.launch() |
|
|