adirik commited on
Commit
ba97523
1 Parent(s): 9808945

add slider

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -16,7 +16,8 @@ model.eval()
16
  processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
17
 
18
 
19
- def query_image(img, text_queries):
 
20
  text_queries = text_queries.split(",")
21
  inputs = processor(text=text_queries, images=img, return_tensors="pt").to(device)
22
 
@@ -30,8 +31,6 @@ def query_image(img, text_queries):
30
  boxes, scores, labels = results[0]["boxes"], results[0]["scores"], results[0]["labels"]
31
 
32
  img = cv2.resize(img, (768, 768), interpolation = cv2.INTER_AREA)
33
- score_threshold = 0.11
34
-
35
  font = cv2.FONT_HERSHEY_SIMPLEX
36
 
37
  for box, score, label in zip(boxes, scores, labels):
@@ -55,15 +54,17 @@ Gradio demo for <a href="https://huggingface.co/docs/transformers/main/en/model_
55
  introduced in <a href="https://arxiv.org/abs/2205.06230">Simple Open-Vocabulary Object Detection
56
  with Vision Transformers</a>.
57
  \n\nYou can use OWL-ViT to query images with text descriptions of any object.
58
- To use it, simply upload an image and enter comma separated text descriptions of objects you want to query the image for.
 
59
  \n\n<a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb">Colab demo</a>
60
  """
61
  demo = gr.Interface(
62
  query_image,
63
- inputs=[gr.Image(shape=(768, 768)), "text"],
64
  outputs="image",
65
  title="Zero-Shot Object Detection with OWL-ViT",
66
  description=description,
67
- examples=[["assets/astronaut.png", "human face, rocket, flag, nasa badge"], ["assets/coffee.png", "coffee mug, spoon, plate"]]
 
68
  )
69
  demo.launch(debug=True)
 
16
  processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
17
 
18
 
19
+ def query_image(img, text_queries, score_threshold):
20
+ text_queries = text_queries
21
  text_queries = text_queries.split(",")
22
  inputs = processor(text=text_queries, images=img, return_tensors="pt").to(device)
23
 
 
31
  boxes, scores, labels = results[0]["boxes"], results[0]["scores"], results[0]["labels"]
32
 
33
  img = cv2.resize(img, (768, 768), interpolation = cv2.INTER_AREA)
 
 
34
  font = cv2.FONT_HERSHEY_SIMPLEX
35
 
36
  for box, score, label in zip(boxes, scores, labels):
 
54
  introduced in <a href="https://arxiv.org/abs/2205.06230">Simple Open-Vocabulary Object Detection
55
  with Vision Transformers</a>.
56
  \n\nYou can use OWL-ViT to query images with text descriptions of any object.
57
+ To use it, simply upload an image and enter comma separated text descriptions of objects you want to query the image for. You
58
+ can also use the score threshold slider to set a threshold to filter out low probability prediction.
59
  \n\n<a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/zeroshot_object_detection_with_owlvit.ipynb">Colab demo</a>
60
  """
61
  demo = gr.Interface(
62
  query_image,
63
+ inputs=[gr.Image(shape=(768, 768)), "text", gr.Slider(0, 1, value=0.1),],
64
  outputs="image",
65
  title="Zero-Shot Object Detection with OWL-ViT",
66
  description=description,
67
+ examples=[["assets/astronaut.png", "human face, rocket, flag, nasa badge"], ["assets/coffee.png", "coffee mug, spoon, plate"]],
68
+ live=True
69
  )
70
  demo.launch(debug=True)