Spaces:

hkhho
/

zeroshot_detection_by_imagequery

Runtime error

App Files Files Community

hkhho commited on Apr 20, 2023

Commit

872be7d

1 Parent(s): faea0f9

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -34

app.py CHANGED Viewed

@@ -1,78 +1,81 @@
 import streamlit as st
 import cv2
-import skimage
 import numpy as np
 from PIL import Image
 import torch
 import matplotlib.pyplot as plt
 from transformers import OwlViTProcessor, OwlViTForObjectDetection
 from transformers.image_utils import ImageFeatureExtractionMixin
-import requests
-# Set up the model
 model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
 processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
 device = torch.device("cpu")
 model = model.to(device)
 model.eval()
-# Set up the page layout
-st.set_page_config(layout="wide")
-title = """<h1 id="title">Zero-shot Object Detection</h1>"""
-st.markdown(title, unsafe_allow_html=True)
-col1, col2 = st.beta_columns(2)
-# Upload the input image
 with col1:
-    st.subheader("Input Image")
-    uploaded_image = st.file_uploader(label="Upload an image", type=["jpg", "jpeg", "png"])
-# Upload the query image
 with col2:
-    st.subheader("Query Image")
-    query_url = "https://assets.gamepur.com/wp-content/uploads/2022/07/29191902/DIGIMON-SURVIVE_20220729122707-1.jpg"
-    query_image = Image.open(requests.get(query_url, stream=True).raw)
-    st.image(query_image, caption="Query Image")
-# Set the threshold ratio
-threshold_ratio = st.slider("Select a threshold ratio:", min_value=0.1, max_value=1.0, value=0.6, step=0.1)
-# Set the text query
-text_query = st.text_input("Enter the class name:")
-# Process the input and query images
-if uploaded_image is not None:
-    image = Image.open(uploaded_image)
     target_sizes = torch.Tensor([image.size[::-1]])
-    inputs = processor(images=image, query_images=query_image, text_queries=[text_query], return_tensors="pt").to(device)
-    # Run the model
     with torch.no_grad():
         outputs = model.image_guided_detection(**inputs)
-    # Post-process the results
     img = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
     outputs.logits = outputs.logits.cpu()
     outputs.target_pred_boxes = outputs.target_pred_boxes.cpu()
     results = processor.post_process_image_guided_detection(outputs=outputs, threshold=threshold_ratio, nms_threshold=0.3, target_sizes=target_sizes)
     boxes, scores = results[0]["boxes"], results[0]["scores"]
-    # Draw the predicted bounding boxes
     for box, score in zip(boxes, scores):
         box = [int(i) for i in box.tolist()]
-        cx, cy, x, y = box
-        img = cv2.rectangle(img, box[:2], box[2:], (255, 0, 0), 5)
         if box[3] + 25 > 768:
             y = box[3] - 10
         else:
             y = box[3] + 25
         plt.imshow(img[:,:,::-1])
-        plt.text(cx, cy, text_query + " " + str(round(score.tolist(), 2)), ha="left", va="top", color="red", bbox={"facecolor": "white", "edgecolor": "red", "boxstyle": "square,pad=.3"})
-    # Display the output image
-    output_image = plt.gcf().canvas.tostring_rgb()
-    plt.clf()
     st.image(output_image, caption='Predicted Image', use_column_width=True)
 else:

 import streamlit as st
 import cv2
 import numpy as np
 from PIL import Image
 import torch
 import matplotlib.pyplot as plt
 from transformers import OwlViTProcessor, OwlViTForObjectDetection
 from transformers.image_utils import ImageFeatureExtractionMixin
+st.set_option('deprecation.showfileUploaderEncoding', False)
 model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32")
 processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
 device = torch.device("cpu")
 model = model.to(device)
 model.eval()
+st.title('Zero-shot Object Detection')
+# Input image and query image upload
+col1, col2 = st.beta_columns(2)
 with col1:
+    uploaded_image = st.file_uploader("Upload input image", type=["jpg", "jpeg", "png"])
+    if uploaded_image is not None:
+        image = Image.open(uploaded_image)
+        st.image(image, caption='Input Image', use_column_width=True)
 with col2:
+    uploaded_query = st.file_uploader("Upload query image", type=["jpg", "jpeg", "png"])
+    if uploaded_query is not None:
+        query_image = Image.open(uploaded_query)
+        st.image(query_image, caption='Query Image', use_column_width=True)
+# Threshold ratio bar and class name input
+threshold_ratio = st.slider('Select threshold ratio:', min_value=0.0, max_value=1.0, step=0.1, value=0.6)
+class_name = st.text_input('Enter class name:', value='agumon')
+if uploaded_image is not None and uploaded_query is not None:
+    # Process input and query image
+    text_queries = [class_name]
     target_sizes = torch.Tensor([image.size[::-1]])
+    inputs = processor(images=image, query_images=query_image, return_tensors="pt").to(device)
     with torch.no_grad():
         outputs = model.image_guided_detection(**inputs)
     img = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
     outputs.logits = outputs.logits.cpu()
     outputs.target_pred_boxes = outputs.target_pred_boxes.cpu()
     results = processor.post_process_image_guided_detection(outputs=outputs, threshold=threshold_ratio, nms_threshold=0.3, target_sizes=target_sizes)
     boxes, scores = results[0]["boxes"], results[0]["scores"]
+    # Draw predicted bounding boxes and text
     for box, score in zip(boxes, scores):
         box = [int(i) for i in box.tolist()]
+        cx,cy,x,y=box
+        img = cv2.rectangle(img, box[:2], box[2:], (255,0,0), 5)
         if box[3] + 25 > 768:
             y = box[3] - 10
         else:
             y = box[3] + 25
         plt.imshow(img[:,:,::-1])
+        plt.text(
+              cx ,
+              cy ,
+              class_name+str(round(score.tolist(),2)) ,
+              ha="left",
+              va="top",
+              color="red",
+              bbox={
+                  "facecolor": "white",
+                  "edgecolor": "red",
+                  "boxstyle": "square,pad=.3"
+              })
+    output_image = img[:,:,::-1]
     st.image(output_image, caption='Predicted Image', use_column_width=True)
 else: