adirik commited on
Commit
ed45e61
1 Parent(s): 3ea7a3d

create app

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: Image Guided Owlvit
3
- emoji: 🌖
4
  colorFrom: yellow
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 3.10.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
1
  ---
2
+ title: Image-Guided OWL-ViT Demo
3
+ emoji: 🔥
4
  colorFrom: yellow
5
+ colorTo: yellow
6
  sdk: gradio
7
+ sdk_version: 3.1.3
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import cv2
3
+ import gradio as gr
4
+ import numpy as np
5
+ from transformers import OwlViTProcessor, OwlViTForObjectDetection
6
+
7
+
8
+ # Use GPU if available
9
+ if torch.cuda.is_available():
10
+ device = torch.device("cuda")
11
+ else:
12
+ device = torch.device("cpu")
13
+
14
+ model = OwlViTForObjectDetection.from_pretrained("google/owlvit-base-patch32").to(device)
15
+ model.eval()
16
+ processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
17
+
18
+
19
+ def image_guided_detection(img, query_img, score_threshold, nms_threshold):
20
+ target_sizes = torch.Tensor([img.shape[:2]])
21
+ inputs = processor(query_images=query_img, images=img, return_tensors="pt").to(device)
22
+
23
+ with torch.no_grad():
24
+ outputs = model.image_guided_detection(**inputs)
25
+
26
+ outputs.logits = outputs.logits.cpu()
27
+ outputs.pred_boxes = outputs.target_pred_boxes.cpu()
28
+ results = processor.post_process_image_guided_detection(
29
+ outputs=outputs,
30
+ threshold=score_threshold,
31
+ nms_threshold=nms_threshold,
32
+ target_sizes=target_sizes
33
+ )
34
+
35
+ boxes, scores = results[0]["boxes"], results[0]["scores"]
36
+
37
+ for box, score in zip(boxes, scores):
38
+ box = [int(i) for i in box.tolist()]
39
+
40
+ if score >= score_threshold:
41
+ img = cv2.rectangle(img, box[:2], box[2:], (255,0,0), 5)
42
+ if box[3] + 25 > 768:
43
+ y = box[3] - 10
44
+ else:
45
+ y = box[3] + 25
46
+ return img
47
+
48
+
49
+ description = """
50
+ Gradio demo for image-guided / one-shot object detection with OWL-ViT -
51
+ <a href="https://huggingface.co/docs/transformers/main/en/model_doc/owlvit">OWL-ViT</a>,
52
+ introduced in <a href="https://arxiv.org/abs/2205.06230">Simple Open-Vocabulary Object Detection
53
+ with Vision Transformers</a>.
54
+
55
+ \n\nYou can use OWL-ViT to query images with text descriptions of any object or alternatively with an
56
+ example / query image of the target object. To use it, simply upload an image and a query image that only contains the object
57
+ you're looking for. You can also use the score and non-maximum suppression threshold sliders to set a threshold to filter out
58
+ low probability and overlapping bounding box predictions.
59
+
60
+ \n\nFor an in-depth tutorial on how to use OWL-ViT with transformers, check out our
61
+ <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb">Colab notebook</a>
62
+ and our HF spaces <a href="https://huggingface.co/spaces/adirik/OWL-ViT">demo </a>for zero-shot / text-guided object detection.
63
+ """
64
+
65
+ demo = gr.Interface(
66
+ image_guided_detection,
67
+ inputs=[gr.Image(), gr.Image(), gr.Slider(0, 1, value=0.6), gr.Slider(0, 1, value=0.3)],
68
+ outputs="image",
69
+ title="Image-Guided Object Detection with OWL-ViT",
70
+ description=description,
71
+ examples=[
72
+ ["assets/image2.jpeg", "assets/query2.jpeg", 0.7, 0.3],
73
+ ["assets/image1.jpeg", "assets/query1.jpeg", 0.6, 0.3]
74
+ ]
75
+ )
76
+
77
+ demo.launch()
assets/.DS_Store ADDED
Binary file (6.15 kB). View file
 
assets/image1.jpeg ADDED
assets/image2.jpeg ADDED
assets/query1.jpeg ADDED
assets/query2.jpeg ADDED
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # pip install -r requirements.txt
2
+
3
+ numpy>=1.18.5
4
+ torch>=1.7.0
5
+ torchvision>=0.8.1
6
+ git+https://github.com/huggingface/transformers.git
7
+ opencv-python