JuanMa360 commited on
Commit
8f35db0
1 Parent(s): 2d16554

refactor: app

Browse files
Files changed (1) hide show
  1. app.py +21 -18
app.py CHANGED
@@ -1,25 +1,28 @@
 
1
  import gradio as gr
2
- import spaces
3
- import torch
4
- from transformers import AutoProcessor, LlavaForConditionalGeneration
5
 
6
- model_id = "llava-hf/llava-1.5-7b-hf"
 
7
 
8
- prompt_format = "USER: <image>\n{}\nASSISTANT:"
9
 
10
- model = LlavaForConditionalGeneration.from_pretrained(
11
- model_id,
12
- torch_dtype=torch.float16,
13
- low_cpu_mem_usage=True,
14
- ).cuda()
15
 
16
- processor = AutoProcessor.from_pretrained(model_id)
 
17
 
18
- @spaces.GPU
19
- def inference(text, image):
20
- prompt = prompt_format.format(text)
21
- inputs = processor(prompt, image, return_tensors='pt').to(0, torch.float16)
22
- output = model.generate(**inputs, max_new_tokens=1024)
23
- return processor.decode(output[0], skip_special_tokens=True).split("ASSISTANT:")[-1]
 
24
 
25
- gr.Interface(fn=inference, inputs=[gr.Text(), gr.Image()], outputs=gr.Text()).launch()
 
1
+ from PIL import Image
2
  import gradio as gr
3
+ import requests
4
+ from transformers import AutoProcessor, BlipForQuestionAnswering
 
5
 
6
+ model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
7
+ processor = AutoProcessor.from_pretrained("Salesforce/blip-vqa-base")
8
 
9
+ def generate_answer(text, image):
10
 
11
+ text = text
12
+ inputs = processor(images=image, text=text, return_tensors="pt")
13
+ outputs = model.generate(**inputs)
14
+
15
+ return processor.decode(outputs[0], skip_special_tokens=True)
16
 
17
+ text_input = gr.Textbox(lines=5, label="Enter text")
18
+ image_input = gr.Image(type="pil", label="Upload Image")
19
 
20
+ iface = gr.Interface(
21
+ fn=generate_answer,
22
+ inputs=[text_input, image_input],
23
+ outputs="text",
24
+ title="DD360-Bot-Multimodal",
25
+ description="Enter text and upload an image"
26
+ )
27
 
28
+ iface.launch()