xxx1 commited on
Commit
eb65419
1 Parent(s): adb7139

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import string
2
+ import gradio as gr
3
+ import requests
4
+ import torch
5
+
6
+
7
+ from transformers import BlipForQuestionAnswering, BlipProcessor
8
+
9
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10
+
11
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-large")
12
+ model_vqa = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-large").to(device)
13
+ def inference_chat(input_image,input_text):
14
+ inputs = processor(images=input_image, text=input_text,return_tensors="pt")
15
+
16
+
17
+ inputs["max_length"] = 20
18
+ inputs["num_beams"] = 5
19
+
20
+ out = model_vqa.generate(**inputs)
21
+ return processor.batch_decode(out, skip_special_tokens=True)[0]
22
+
23
+ with gr.Blocks(
24
+ css="""
25
+ .message.svelte-w6rprc.svelte-w6rprc.svelte-w6rprc {font-size: 20px; margin-top: 20px}
26
+ #component-21 > div.wrap.svelte-w6rprc {height: 600px;}
27
+ """
28
+ ) as iface:
29
+ state = gr.State([])
30
+ #caption_output = None
31
+ #gr.Markdown(title)
32
+ #gr.Markdown(description)
33
+ #gr.Markdown(article)
34
+
35
+ with gr.Row():
36
+ with gr.Column(scale=1):
37
+ image_input = gr.Image(type="pil")
38
+
39
+ with gr.Row():
40
+
41
+ with gr.Column(scale=1):
42
+ caption_output = None
43
+ chat_input = gr.Textbox(lines=1, label="VQA Input")
44
+ chat_input.submit(
45
+ inference_chat,
46
+ [
47
+ image_input,
48
+ chat_input,
49
+ ],
50
+ [ caption_output],
51
+ )
52
+
53
+ with gr.Row():
54
+ clear_button = gr.Button(value="Clear", interactive=True)
55
+ clear_button.click(
56
+ lambda: ("", [], []),
57
+ [],
58
+ [chat_input, state],
59
+ queue=False,
60
+ )
61
+
62
+ submit_button = gr.Button(
63
+ value="Submit", interactive=True, variant="primary"
64
+ )
65
+ submit_button.click(
66
+ inference_chat,
67
+ [
68
+ image_input,
69
+ chat_input,
70
+ ],
71
+ [caption_output],
72
+ )
73
+ caption_output = gr.Textbox(lines=1, label="VQA Output")
74
+
75
+
76
+ image_input.change(
77
+ lambda: ("", "", []),
78
+ [],
79
+ [ caption_output, state],
80
+ queue=False,
81
+ )
82
+
83
+
84
+ # examples = gr.Examples(
85
+ # examples=examples,
86
+ # inputs=[image_input, chat_input],
87
+ # )
88
+
89
+ iface.queue(concurrency_count=1, api_open=False, max_size=10)
90
+ iface.launch(enable_queue=True)