Spaces:

adirik
/

OWL-ViT

Runtime error

adirik commited on Aug 4, 2022

Commit

bc5dfe0

•

1 Parent(s): dff85a2

fix styling

Files changed (3) hide show

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import torch
 import gradio as gr
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
@@ -19,19 +20,24 @@ def query_image(img, text_queries):
     results = processor.post_process(outputs=outputs, target_sizes=target_sizes)
     boxes, scores, labels = results[0]["boxes"], results[0]["scores"], results[0]["labels"]
-    draw = ImageDraw.Draw(img)
-    font = ImageFont.truetype("assets/Helvetica.ttf", size=22)
-    score_threshold = 0.1
     for box, score, label in zip(boxes, scores, labels):
         box = [int(i) for i in box.tolist()]
         if score >= score_threshold:
-            draw.rectangle(box, outline="red", width=4)
-            text_loc =[box[0]+5, box[3]+10]
-            draw.text(text_loc, text_queries[label], fill="red", font=font, stroke_width=1)
-    img = np.array(img)
     return img
@@ -45,7 +51,7 @@ To use it, simply upload an image and enter comma separated text descriptions of
 """
 demo = gr.Interface(
     query_image,
-    inputs=[gr.Image(shape=(768, 768), type="pil"), "text"],
     outputs="image",
     title="Zero-Shot Object Detection with OWL-ViT",
     description=description,

 import torch
+import cv2
 import gradio as gr
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
     results = processor.post_process(outputs=outputs, target_sizes=target_sizes)
     boxes, scores, labels = results[0]["boxes"], results[0]["scores"], results[0]["labels"]
+    img = cv2.resize(img, (768, 768), interpolation = cv2.INTER_AREA)
+    score_threshold = 0.11
+    font = cv2.FONT_HERSHEY_SIMPLEX
     for box, score, label in zip(boxes, scores, labels):
         box = [int(i) for i in box.tolist()]
         if score >= score_threshold:
+            img = cv2.rectangle(img, box[:2], box[2:], (255,0,0), 5)
+            if box[3] + 25 > 768:
+                y = box[3] - 10
+            else:
+                y = box[3] + 25
+            img = cv2.putText(
+                img, text_queries[label], (box[0], y), font, 1, (255,0,0), 2, cv2.LINE_AA
+            )
     return img
 """
 demo = gr.Interface(
     query_image,
+    inputs=[gr.Image(shape=(768, 768)), "text"],
     outputs="image",
     title="Zero-Shot Object Detection with OWL-ViT",
     description=description,

requirements.txt CHANGED Viewed

@@ -4,4 +4,5 @@ numpy>=1.18.5
 torch>=1.7.0
 torchvision>=0.8.1
 Pillow
-transformers

 torch>=1.7.0
 torchvision>=0.8.1
 Pillow
+transformers
+opencv-python