Spaces:

capjamesg
/

Grounded-SAM-2

Build error

App Files Files Community

capjamesg commited on Jul 31, 2024

Commit

7b87048

1 Parent(s): 97dbcd8

fix merge conflict

Browse files

Files changed (2) hide show

app.py +50 -19
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -1,34 +1,65 @@
-import cv2
 import gradio as gr
-import numpy as np
 import spaces
-import supervision as sv
-from autodistill.detection import CaptionOntology
-from autodistill.utils import plot
 from autodistill_grounded_sam_2 import GroundedSAM2
 @spaces.GPU
-def greet(image):
-    base_model = GroundedSAM2(
-        ontology=CaptionOntology({"container id": "container number", "logo": "logo"}),
-        model="Grounding DINO",
-        grounding_dino_box_threshold=0.25,
-    )
-    results = base_model.predict("container1.jpg").with_nms()
-    results = results[results.confidence > 0.3]
-    # print(results)
-    image = cv2.imread("container1.jpg")
     mask_annotator = sv.BoxAnnotator()
-    annotated_image = mask_annotator.annotate(image.copy(), detections=results)
     return annotated_image
-demo = gr.Interface(fn=greet, inputs="image", outputs="image")
 demo.launch()

 import gradio as gr
 import spaces
 from autodistill_grounded_sam_2 import GroundedSAM2
+from autodistill_grounded_sam_2.helpers import combine_detections
+from autodistill.helpers import load_image
+import torch
+from autodistill.detection import CaptionOntology
+import supervision as sv
+import nupmy as np
+base_model = GroundedSAM2(
+    ontology=CaptionOntology({}),
+    model = "Grounding DINO",
+    grounding_dino_box_threshold=0.25
+)
 @spaces.GPU
+def greet(image, prompt):
+    image = load_image(input, return_format="cv2")
+    if base_model.model == "Florence 2":
+        detections = base_model.florence_2_predictor.predict(image)
+    elif base_model.model == "Grounding DINO":
+        # GroundingDINO predictions
+        detections_list = []
+        for i, description in enumerate(prompt.split(",")):
+            # detect objects
+            detections = base_model.grounding_dino_model.predict_with_classes(
+                image=image,
+                classes=[description],
+                box_threshold=base_model.grounding_dino_box_threshold,
+                text_threshold=base_model.grounding_dino_text_threshold,
+            )
+            detections_list.append(detections)
+        detections = combine_detections(
+            detections_list, overwrite_class_ids=range(len(detections_list))
+        )
+    with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
+        base_model.sam_2_predictor.set_image(image)
+        result_masks = []
+        for box in detections.xyxy:
+            masks, scores, _ = base_model.sam_2_predictor.predict(
+                box=box, multimask_output=False
+            )
+            index = np.argmax(scores)
+            masks = masks.astype(bool)
+            result_masks.append(masks[index])
+    detections.mask = np.array(result_masks)
+    results = results[results.confidence > 0.3]
     mask_annotator = sv.BoxAnnotator()
+    annotated_image = mask_annotator.annotate(
+        image.copy(), detections=results
+    )
     return annotated_image
+demo = gr.Interface(fn=greet, inputs=[gr.inputs.Image(), gr.inputs.Textbox(lines=2, label="Prompt")], outputs="image")
 demo.launch()

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 torch
 autodistill
-numpy>=1.20.0
 opencv-python>=4.6.0
 supervision
 roboflow

 torch
 autodistill
+numpy==1.20.0
 opencv-python>=4.6.0
 supervision
 roboflow