Spaces:

adirik
/

image-guided-owlvit

Runtime error

App Files Files Community

adirik commited on Nov 21, 2022

Commit

ed45e61

•

1 Parent(s): 3ea7a3d

create app

Browse files

Files changed (9) hide show

.DS_Store +0 -0
README.md +4 -4
app.py +77 -0
assets/.DS_Store +0 -0
assets/image1.jpeg +0 -0
assets/image2.jpeg +0 -0
assets/query1.jpeg +0 -0
assets/query2.jpeg +0 -0
requirements.txt +7 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: Image Guided Owlvit
-emoji: 🌖
 colorFrom: yellow
-colorTo: pink
 sdk: gradio
-sdk_version: 3.10.1
 app_file: app.py
 pinned: false
 license: apache-2.0

 ---
+title: Image-Guided OWL-ViT Demo
+emoji: 🔥
 colorFrom: yellow
+colorTo: yellow
 sdk: gradio
+sdk_version: 3.1.3
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import torch
+import cv2
+import gradio as gr
+import numpy as np
+from transformers import OwlViTProcessor, OwlViTForObjectDetection
+# Use GPU if available
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+else:
+    device = torch.device("cpu")
+model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32").to(device)
+model.eval()
+processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
+def image_guided_detection(img, query_img, score_threshold, nms_threshold):
+    target_sizes = torch.Tensor([img.shape[:2]])
+    inputs = processor(query_images=query_img, images=img, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = model.image_guided_detection(**inputs)
+    outputs.logits = outputs.logits.cpu()
+    outputs.pred_boxes = outputs.target_pred_boxes.cpu()
+    results = processor.post_process_image_guided_detection(
+        outputs=outputs,
+        threshold=score_threshold,
+        nms_threshold=nms_threshold,
+        target_sizes=target_sizes
+    )
+    boxes, scores = results[0]["boxes"], results[0]["scores"]
+    for box, score in zip(boxes, scores):
+        box = [int(i) for i in box.tolist()]
+        if score >= score_threshold:
+            img = cv2.rectangle(img, box[:2], box[2:], (255,0,0), 5)
+            if box[3] + 25 > 768:
+                y = box[3] - 10
+            else:
+                y = box[3] + 25
+    return img
+description = """
+Gradio demo for image-guided / one-shot object detection with OWL-ViT -
+<a href="https://huggingface.co/docs/transformers/main/en/model_doc/owlvit">OWL-ViT</a>,
+introduced in <a href="https://arxiv.org/abs/2205.06230">Simple Open-Vocabulary Object Detection
+with Vision Transformers</a>.
+\n\nYou can use OWL-ViT to query images with text descriptions of any object or alternatively with an
+example / query image of the target object. To use it, simply upload an image and a query image that only contains the object
+ you're looking for. You can also use the score and non-maximum suppression threshold sliders to set a threshold to filter out
+ low probability and overlapping bounding box predictions.
+\n\nFor an in-depth tutorial on how to use OWL-ViT with transformers, check out our
+<a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb">Colab notebook</a>
+ and our HF spaces <a href="https://huggingface.co/spaces/adirik/OWL-ViT">demo </a>for zero-shot / text-guided object detection.
+"""
+demo = gr.Interface(
+    image_guided_detection,
+    inputs=[gr.Image(), gr.Image(), gr.Slider(0, 1, value=0.6), gr.Slider(0, 1, value=0.3)],
+    outputs="image",
+    title="Image-Guided Object Detection with OWL-ViT",
+    description=description,
+    examples=[
+        ["assets/image2.jpeg", "assets/query2.jpeg", 0.7, 0.3],
+        ["assets/image1.jpeg", "assets/query1.jpeg", 0.6, 0.3]
+    ]
+)
+demo.launch()

assets/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

assets/image1.jpeg ADDED Viewed

assets/image2.jpeg ADDED Viewed

assets/query1.jpeg ADDED Viewed

assets/query2.jpeg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+# pip install -r requirements.txt
+numpy>=1.18.5
+torch>=1.7.0
+torchvision>=0.8.1
+git+https://github.com/huggingface/transformers.git
+opencv-python