KB-VQA-E / app.py
m7mdal7aj's picture
Update app.py
fdc69a0 verified
raw
history blame
4 kB
import streamlit as st
import torch
import bitsandbytes
import accelerate
import scipy
from PIL import Image
import torch.nn as nn
from transformers import Blip2Processor, Blip2ForConditionalGeneration, InstructBlipProcessor, InstructBlipForConditionalGeneration
from my_model.object_detection import detect_and_draw_objects
from my_model.captioner.image_captioning import get_caption
from my_model.utilities import free_gpu_resources
def answer_question(image, question, model, processor):
image = Image.open(image)
inputs = processor(image, question, return_tensors="pt").to("cuda", torch.float16)
if isinstance(model, torch.nn.DataParallel):
# Use the 'module' attribute to access the original model
out = model.module.generate(**inputs, max_length=100, min_length=20)
else:
out = model.generate(**inputs, max_length=100, min_length=20)
answer = processor.decode(out[0], skip_special_tokens=True).strip()
return answer
st.title("Image Question Answering")
# File uploader for the image
image = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
# Text input for the question
question = st.text_input("Enter your question about the image:")
if st.button('Generate Caption'):
free_gpu_resources()
if image is not None:
# Display the image
st.image(image, use_column_width=True)
caption = get_caption(image)
st.write(caption)
free_gpu_resources()
else:
st.write("Please upload an image and enter a question.")
if st.button("Get Answer"):
if image is not None and question:
# Display the image
st.image(image, use_column_width=True)
# Get and display the answer
model, processor = load_caption_model()
answer = answer_question(image, question, model, processor)
st.write(answer)
else:
st.write("Please upload an image and enter a question.")
# Object Detection
# Object Detection UI in the sidebar
st.sidebar.title("Object Detection")
# Dropdown to select the model
detect_model = st.sidebar.selectbox("Choose a model for object detection:", ["detic", "yolov5"])
# Slider for threshold with default values based on the model
threshold = st.sidebar.slider("Select Detection Threshold", 0.1, 0.9, 0.2 if detect_model == "yolov5" else 0.4)
# Button to trigger object detection
detect_button = st.sidebar.button("Detect Objects")
def perform_object_detection(image, model_name, threshold):
"""
Perform object detection on the given image using the specified model and threshold.
Args:
image (PIL.Image): The image on which to perform object detection.
model_name (str): The name of the object detection model to use.
threshold (float): The threshold for object detection.
Returns:
PIL.Image, str: The image with drawn bounding boxes and a string of detected objects.
"""
# Perform object detection and draw bounding boxes
processed_image, detected_objects = detect_and_draw_objects(image, model_name, threshold)
return processed_image, detected_objects
# Check if the 'Detect Objects' button was clicked
if detect_button:
if image is not None:
# Open the uploaded image
try:
image = Image.open(image)
# Display the original image
st.image(image, use_column_width=True, caption="Original Image")
# Perform object detection
processed_image, detected_objects = perform_object_detection(image, detect_model, threshold)
# Display the image with detected objects
st.image(processed_image, use_column_width=True, caption="Image with Detected Objects")
# Display the detected objects as text
st.write(detected_objects)
except Exception as e:
st.error(f"Error loading image: {e}")
else:
st.write("Please upload an image for object detection.")