improprogrammer commited on
Commit
73bea7b
·
verified ·
1 Parent(s): c90ebe6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -0
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ from transformers import BlipProcessor, BlipForConditionalGeneration, BlipForQuestionAnswering
4
+ import torch
5
+
6
+
7
+ def load_models():
8
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
9
+ caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-vqa-base")
10
+ vqa_model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
11
+ return processor, caption_model, vqa_model
12
+ processor, caption_model, vqa_model = load_models()
13
+
14
+ st.title("Visual Question Answering with BLIP")
15
+ description = "This is a multi-language model made by Muhammad Yousaf."
16
+ st.markdown(description)
17
+
18
+ def generate_caption(image):
19
+ device = "cuda" if torch.cuda.is_available() else "cpu"
20
+ inputs = processor(images=image, return_tensors="pt").to(device)
21
+ with torch.no_grad():
22
+ generated_ids = caption_model.generate(**inputs)
23
+ caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
24
+ return caption
25
+
26
+
27
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
28
+ #( st.file_uploader) doc link : https://discuss.streamlit.io/t/adding-in-streamlit-chat-input-the-possibility-to-add-an-image/55156/4
29
+
30
+ if uploaded_file is not None:
31
+ #(st.image)doc link: https://docs.streamlit.io/develop/api-reference/media/st.image
32
+ image = Image.open(uploaded_file)
33
+ st.image(image, caption='Uploaded Image.', use_column_width=True)
34
+
35
+ with st.spinner('Summarizing the image...'):
36
+ #(st.spinner)doc link: https://docs.streamlit.io/develop/api-reference/status/st.spinner
37
+ caption = generate_caption(image)
38
+ st.write(f"**Summary:** {caption}")
39
+
40
+ question = st.text_input("Ask a question about the image:")
41
+
42
+ if question:
43
+
44
+ device = "cuda" if torch.cuda.is_available() else "cpu"
45
+ inputs = processor(image, question, return_tensors="pt").to(device)
46
+
47
+
48
+ with torch.no_grad():
49
+ generated_ids = vqa_model.generate(**inputs)
50
+ answer = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
51
+
52
+ st.write(f"**Answer:** {answer}")