sathvikparasa20 commited on
Commit
5c62fee
1 Parent(s): 1b7de6b

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -39
app.py DELETED
@@ -1,39 +0,0 @@
1
- from transformers import ViltProcessor, ViltForQuestionAnswering
2
- import torch
3
- import gradio as gr
4
-
5
- # Load the model and processor
6
- processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
7
- model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
8
-
9
- def answer_question(image, text):
10
- # Convert the uploaded image to PIL format
11
- image = Image.fromarray(image.astype('uint8'), 'RGB')
12
-
13
- # Process the image and text
14
- encoding = processor(images=image, text=text, return_tensors="pt", padding=True)
15
-
16
- # Forward pass
17
- with torch.no_grad():
18
- outputs = model(**encoding)
19
-
20
- logits = outputs.logits
21
- idx = logits.argmax(-1).item()
22
- predicted_answer = model.config.id2label[idx]
23
-
24
- # Return the predicted answer
25
- return predicted_answer
26
-
27
- # Define Gradio inputs and outputs
28
- image = gr.Image(type="numpy", label="Upload Image")
29
- question = gr.Textbox(lines=2, label="Question")
30
- answer = gr.Textbox(label="Predicted Answer")
31
-
32
- # Create Gradio Interface
33
- gr.Interface(
34
- fn=answer_question,
35
- inputs=[image, question],
36
- outputs=answer,
37
- title="Image Based Visual Question Answering",
38
- description="This is a demonstration of ViLT (Vision and Language Transformer) using Gradio, which has been fine-tuned on VQAv2 to answer questions based on images. To get a predicted answer, please provide an image and type in your question, then press the submit button."
39
- ).launch(share=True)