File size: 858 Bytes
7a24794
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6b5aed7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import gradio as gr

from transformers import ViltProcessor, ViltForQuestionAnswering


def getResult(query, image):
    # prepare image + question
    #image = Image.open(BytesIO(base64.b64decode(base64_encoded_image)))
    text = query

    processor = ViltProcessor.from_pretrained(
        "dandelin/vilt-b32-finetuned-vqa")
    model = ViltForQuestionAnswering.from_pretrained(
        "dandelin/vilt-b32-finetuned-vqa")

    # prepare inputs
    encoding = processor(image, text, return_tensors="pt")

    # forward pass
    outputs = model(**encoding)
    logits = outputs.logits
    idx = logits.argmax(-1).item()
    print("Predicted answer:", model.config.id2label[idx])
    return model.config.id2label[idx]


iface = gr.Interface(fn=getResult, inputs=[
                     "text", gr.Image(type="pil")], outputs="text")
iface.launch(share=True)