tstvqa / vqa.py
hfmrbean's picture
Upload 2 files
7a24794
raw
history blame
880 Bytes
import gradio as gr
from transformers import ViltProcessor, ViltForQuestionAnswering
def getResult(query, image):
# prepare image + question
#image = Image.open(BytesIO(base64.b64decode(base64_encoded_image)))
text = query
processor = ViltProcessor.from_pretrained(
"dandelin/vilt-b32-finetuned-vqa")
model = ViltForQuestionAnswering.from_pretrained(
"dandelin/vilt-b32-finetuned-vqa")
# prepare inputs
encoding = processor(image, text, return_tensors="pt")
# forward pass
outputs = model(**encoding)
logits = outputs.logits
idx = logits.argmax(-1).item()
print("Predicted answer:", model.config.id2label[idx])
return model.config.id2label[idx]
iface = gr.Interface(fn=getResult, inputs=[
"text", gr.Image(type="pil")], outputs="text")
iface.launch(server_name="0.0.0.0",share=True)