xxx1 commited on
Commit
e777852
1 Parent(s): f6234b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -1,8 +1,24 @@
1
  import string
2
  import gradio as gr
3
  import requests
 
 
 
 
 
 
 
 
 
4
  def inference_chat(input_image,input_text):
5
- return input_text
 
 
 
 
 
 
 
6
 
7
  with gr.Blocks(
8
  css="""
@@ -17,8 +33,6 @@ with gr.Blocks(
17
  #gr.Markdown(article)
18
 
19
  with gr.Row():
20
- with gr.Column():
21
- caption_output = gr.Textbox(lines=1, label="VQA Output")
22
  with gr.Column(scale=1):
23
  image_input = gr.Image(type="pil")
24
 
@@ -34,7 +48,7 @@ with gr.Blocks(
34
  ],
35
  [ caption_output],
36
  )
37
-
38
  with gr.Row():
39
  clear_button = gr.Button(value="Clear", interactive=True)
40
  clear_button.click(
 
1
  import string
2
  import gradio as gr
3
  import requests
4
+ import torch
5
+
6
+
7
+ from transformers import BlipForQuestionAnswering, BlipProcessor
8
+
9
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10
+
11
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
12
+ model_vqa = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to(device)
13
  def inference_chat(input_image,input_text):
14
+ inputs = processor(images=input_image, text=input_text,return_tensors="pt")
15
+
16
+
17
+ inputs["max_length"] = 20
18
+ inputs["num_beams"] = 5
19
+
20
+ out = model_vqa.generate(**inputs)
21
+ return processor.batch_decode(out, skip_special_tokens=True)[0]
22
 
23
  with gr.Blocks(
24
  css="""
 
33
  #gr.Markdown(article)
34
 
35
  with gr.Row():
 
 
36
  with gr.Column(scale=1):
37
  image_input = gr.Image(type="pil")
38
 
 
48
  ],
49
  [ caption_output],
50
  )
51
+ caption_output = gr.Textbox(lines=1, label="VQA Output")
52
  with gr.Row():
53
  clear_button = gr.Button(value="Clear", interactive=True)
54
  clear_button.click(