Spaces:

Arulkumar03
/

GroundingDINO_SOTA_Zero_Shot_Model

Running

App Files Files Community

Arulkumar03 commited on Oct 17, 2023

Commit

bffd5b1

•

1 Parent(s): 148168e

Upload app.py

Browse files

Files changed (1) hide show

app.py +98 -0

app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import argparse
+import copy
+from IPython.display import display
+from PIL import Image, ImageDraw, ImageFont
+from torchvision.ops import box_convert
+# Grounding DINO
+import groundingdino.datasets.transforms as T
+from groundingdino.models import build_model
+from groundingdino.util import box_ops
+from groundingdino.util.slconfig import SLConfig
+from groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap
+from groundingdino.util.inference import annotate, load_image, predict
+import supervision as sv
+# segment anything
+from segment_anything import build_sam, SamPredictor
+import cv2
+import numpy as np
+import matplotlib.pyplot as plt
+# diffusers
+import PIL
+import requests
+import torch
+from io import BytesIO
+from diffusers import StableDiffusionInpaintPipeline
+from huggingface_hub import hf_hub_download
+def load_model_hf(repo_id, filename, ckpt_config_filename, device='cpu'):
+    cache_config_file = hf_hub_download(repo_id=repo_id, filename=ckpt_config_filename)
+    args = SLConfig.fromfile(cache_config_file)
+    args.device = device
+    model = build_model(args)
+    cache_file = hf_hub_download(repo_id=repo_id, filename=filename)
+    checkpoint = torch.load(cache_file, map_location=device)
+    log = model.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
+    print("Model loaded from {} \n => {}".format(cache_file, log))
+    _ = model.eval()
+    return model
+ckpt_repo_id = "ShilongLiu/GroundingDINO"
+ckpt_filenmae = "groundingdino_swinb_cogcoor.pth"
+ckpt_config_filename = "GroundingDINO_SwinB.cfg.py"
+groundingdino_model = load_model_hf(ckpt_repo_id, ckpt_filenmae, ckpt_config_filename, device)
+checkpoint = 'sam_vit_h_4b8939.pth'
+predictor = SamPredictor(build_sam(checkpoint=checkpoint).to(device))
+# detect object using grounding DINO
+def detect(image, text_prompt, model, box_threshold = 0.3, text_threshold = 0.25):
+  boxes, logits, phrases = predict(
+      model=model,
+      image=image,
+      caption=text_prompt,
+      box_threshold=box_threshold,
+      text_threshold=text_threshold
+  )
+  annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases)
+  annotated_frame = annotated_frame[...,::-1] # BGR to RGB
+  return annotated_frame, boxes
+import gradio as gr
+# Define the Gradio interface
+def detect_objects(image, text_prompt):
+    # Convert Gradio input format to the format expected by the code
+    image_array = np.array(image)
+    image_source, _ = load_image(image_array)
+    # Detect objects using grounding DINO
+    annotated_frame, detected_boxes = detect(image_array, text_prompt, groundingdino_model)
+    # Convert the annotated frame to Gradio output format
+    annotated_image = Image.fromarray(annotated_frame)
+    return annotated_image
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=detect_objects,
+    inputs=[gr.Image(), "text"],
+    outputs=gr.Image(),
+    live=True,
+    interpretation="default"
+)
+# Launch the Gradio interface
+iface.launch()