Spaces:

prithivMLmods
/

SAM3-Demo

Running on Zero

App Files Files Community

prithivMLmods commited on 17 days ago

Commit

e933213

verified ·

1 Parent(s): 05bb57b

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -267

app.py CHANGED Viewed

@@ -1,98 +1,15 @@
 import os
-import sys
-import subprocess
 import spaces
 import gradio as gr
 import numpy as np
 import torch
-import cv2
-import tempfile
-import shutil
-import glob
-from PIL import Image
 from typing import Iterable
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
-# ---------------------------------------------------------
-# 1. ENVIRONMENT SETUP & REPO CLONING
-# ---------------------------------------------------------
-REPO_URL = "https://github.com/facebookresearch/sam-3d-body.git"
-REPO_DIR = "sam-3d-body"
-def setup_sam_3d_env():
-    """
-    Clones the repo, installs dependencies, and fixes sys.path
-    so that 'utils', 'tools', and 'sam_3d_body' can be imported.
-    """
-    # 1. Clone if not exists
-    if not os.path.exists(REPO_DIR):
-        print(f"Cloning SAM 3D Body repository from {REPO_URL}...")
-        try:
-            subprocess.run(["git", "clone", REPO_URL], check=True)
-            print("Installing sam-3d-body package in editable mode...")
-            # We install using pip to resolve internal package dependencies
-            subprocess.run([sys.executable, "-m", "pip", "install", "-e", REPO_DIR], check=True)
-            # Install other requirements usually needed
-            subprocess.run([sys.executable, "-m", "pip", "install", "trimesh", "opencv-python", "matplotlib"], check=True)
-        except subprocess.CalledProcessError as e:
-            print(f"Error during setup: {e}")
-            return False
-    # 2. Add Critical Paths to sys.path
-    repo_abs_path = os.path.abspath(REPO_DIR)
-    notebook_path = os.path.join(repo_abs_path, "notebook")
-    # CRITICAL: Add repo root first so 'import tools' and 'import sam_3d_body' work inside utils.py
-    if repo_abs_path not in sys.path:
-        sys.path.insert(0, repo_abs_path)
-        print(f"Added to sys.path: {repo_abs_path}")
-    # Add notebook folder so we can 'import utils'
-    if notebook_path not in sys.path:
-        sys.path.insert(0, notebook_path)
-        print(f"Added to sys.path: {notebook_path}")
-    return True
-# Run setup immediately
-env_ready = setup_sam_3d_env()
-# ---------------------------------------------------------
-# 2. IMPORTS
-# ---------------------------------------------------------
-# --- Import SAM3 (Segmentation) ---
-try:
-    from transformers import Sam3Processor, Sam3Model
-    SAM3_AVAILABLE = True
-except ImportError:
-    print("Warning: transformers library not found or outdated. SAM3 will be disabled.")
-    SAM3_AVAILABLE = False
-# --- Import SAM 3D Body Utils ---
-# We use a specific alias to avoid confusion with standard python utils
-sam3d_utils = None
-SAM3D_AVAILABLE = False
-if env_ready:
-    try:
-        # Now that sys.path is fixed, this import should work
-        # and utils.py will successfully find 'tools' and 'sam_3d_body'
-        import utils as sam3d_utils_module
-        sam3d_utils = sam3d_utils_module
-        SAM3D_AVAILABLE = True
-        print("SAM 3D Body utils imported successfully.")
-    except ImportError as e:
-        print(f"Error importing SAM 3D Body utils: {e}")
-        print("This usually happens if 'tools' or 'sam_3d_body' cannot be found by utils.py")
-        import traceback
-        traceback.print_exc()
-# ---------------------------------------------------------
-# 3. THEME DEFINITION
-# ---------------------------------------------------------
 colors.steel_blue = colors.Color(
     name="steel_blue",
     c50="#EBF3F8",
@@ -158,222 +75,119 @@ steel_blue_theme = SteelBlueTheme()
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
-# ---------------------------------------------------------
-# 4. LOAD MODELS
-# ---------------------------------------------------------
-# --- 1. Load SAM3 ---
-sam3_model = None
-sam3_processor = None
-if SAM3_AVAILABLE:
-    try:
-        print("Loading SAM3 Model...")
-        sam3_model = Sam3Model.from_pretrained("facebook/sam3").to(device)
-        sam3_processor = Sam3Processor.from_pretrained("facebook/sam3")
-        print("SAM3 Loaded.")
-    except Exception as e:
-        print(f"Error loading SAM3: {e}")
-# --- 2. Load SAM 3D Body ---
-sam3d_estimator = None
-sam3d_visualizer = None
-if SAM3D_AVAILABLE:
-    try:
-        print("Loading SAM 3D Body Estimator (this may take a moment)...")
-        # Initialize estimator using the utility function from the repo
-        # Note: detector_name="vitdet" is default, requiring 'tools' import to work
-        sam3d_estimator = sam3d_utils.setup_sam_3d_body(
-            hf_repo_id="facebook/sam-3d-body-dinov3",
-            device=device
-        )
-        sam3d_visualizer = sam3d_utils.setup_visualizer()
-        print("SAM 3D Body Loaded Successfully.")
-    except Exception as e:
-        print(f"Error loading SAM 3D Body model: {e}")
-        # If it fails, we set the flag to False so the UI handles it gracefully
-        SAM3D_AVAILABLE = False
-        import traceback
-        traceback.print_exc()
-# ---------------------------------------------------------
-# 5. INFERENCE FUNCTIONS
-# ---------------------------------------------------------
 @spaces.GPU
 def segment_image(input_image, text_prompt, threshold=0.5):
-    """Handler for Tab 1: Segmentation"""
     if input_image is None:
         raise gr.Error("Please upload an image.")
     if not text_prompt:
-        raise gr.Error("Please enter a text prompt.")
-    if sam3_model is None:
-        raise gr.Error("SAM3 Model is not loaded.")
     image_pil = input_image.convert("RGB")
-    inputs = sam3_processor(images=image_pil, text=text_prompt, return_tensors="pt").to(device)
     with torch.no_grad():
-        outputs = sam3_model(**inputs)
-    results = sam3_processor.post_process_instance_segmentation(
         outputs,
         threshold=threshold,
         mask_threshold=0.5,
         target_sizes=inputs.get("original_sizes").tolist()
     )[0]
-    masks = results['masks'].cpu().numpy()
-    scores = results['scores'].cpu().numpy()
     annotations = []
-    for i, mask in enumerate(masks):
-        label = f"{text_prompt} ({scores[i]:.2f})"
         annotations.append((mask, label))
     return (image_pil, annotations)
-@spaces.GPU
-def process_3d_body(input_image):
-    """Handler for Tab 2: 3D Body Reconstruction"""
-    if input_image is None:
-        raise gr.Error("Please upload an image.")
-    if not SAM3D_AVAILABLE or sam3d_estimator is None:
-        raise gr.Error("SAM 3D Body libraries or model failed to load. Check console logs.")
-    # Convert PIL to CV2 BGR for the estimator
-    img_np = np.array(input_image.convert("RGB"))
-    img_cv2 = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
-    # The estimator.process_one_image expects a file path
-    with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
-        tmp_path = tmp_file.name
-        cv2.imwrite(tmp_path, img_cv2)
-    try:
-        print(f"Processing 3D Body for {tmp_path}...")
-        # 1. Run Inference
-        # process_one_image is a method of the estimator class inside sam-3d-body
-        outputs = sam3d_estimator.process_one_image(tmp_path)
-        if not outputs:
-            return None, None, None, "No people detected."
-        # 2. 2D Keypoints Visualization
-        vis_results_2d = sam3d_utils.visualize_2d_results(img_cv2, outputs, sam3d_visualizer)
-        # Combine if multiple, or just take first for display simplicity.
-        # Usually vis_results_2d is a list of full images with drawings.
-        if vis_results_2d:
-            # For simplicity, if multiple people, the last one overrides or we assume 1 main person
-            # Ideally we'd grid them, but for Gradio output, let's take the first result's image
-            res_2d_rgb = cv2.cvtColor(vis_results_2d[0], cv2.COLOR_BGR2RGB)
-        else:
-            res_2d_rgb = img_np
-        # 3. 3D Overlay Visualization
-        # visualize_3d_mesh returns a wide image (Original | Overlay | White | Side)
-        mesh_results_wide = sam3d_utils.visualize_3d_mesh(img_cv2, outputs, sam3d_estimator.faces)
-        if mesh_results_wide:
-            res_3d_overlay_rgb = cv2.cvtColor(mesh_results_wide[0], cv2.COLOR_BGR2RGB)
-        else:
-            res_3d_overlay_rgb = img_np
-        # 4. Save PLY for Model3D
-        # Create a unique directory for this run
-        output_dir = tempfile.mkdtemp()
-        image_name = "gradio_mesh"
-        # save_mesh_results returns list of paths to .ply files
-        ply_files = sam3d_utils.save_mesh_results(
-            img_cv2,
-            outputs,
-            sam3d_estimator.faces,
-            output_dir,
-            image_name
-        )
-        ply_path = None
-        if ply_files and len(ply_files) > 0:
-            ply_path = ply_files[0] # Return the first mesh found
-        status_msg = f"Detected {len(outputs)} person(s). Displaying Person 0."
-        return res_2d_rgb, res_3d_overlay_rgb, ply_path, status_msg
-    except Exception as e:
-        import traceback
-        traceback.print_exc()
-        raise gr.Error(f"Inference failed: {str(e)}")
-    finally:
-        # Cleanup input temp file
-        if os.path.exists(tmp_path):
-            os.remove(tmp_path)
-# ---------------------------------------------------------
-# 6. GUI
-# ---------------------------------------------------------
-css = """
 #col-container {
     margin: 0 auto;
-    max-width: 1200px;
 }
-#main-title h1 {font-size: 2.1em !important; text-align: center;}
-.gradio-container {min-height: 0px !important;}
 """
 with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     with gr.Column(elem_id="col-container"):
-        gr.Markdown("# **SAM Integrated Vision Suite**", elem_id="main-title")
-        with gr.Tabs():
-            # ================= TAB 1: SEGMENTATION =================
-            with gr.Tab("SAM3 Segmentation"):
-                gr.Markdown("Segment objects using **SAM3** with text prompts.")
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        t1_input = gr.Image(label="Input Image", type="pil", height=350)
-                        t1_prompt = gr.Textbox(label="Text Prompt", placeholder="e.g., cat, face...")
-                        t1_thresh = gr.Slider(0.0, 1.0, 0.4, step=0.05, label="Threshold")
-                        t1_btn = gr.Button("Segment", variant="primary")
-                    with gr.Column(scale=1.5):
-                        t1_output = gr.AnnotatedImage(label="Segmented Output", height=450)
-                t1_btn.click(segment_image, [t1_input, t1_prompt, t1_thresh], [t1_output])
-                # Optional examples if files exist
-                # gr.Examples(...)
-            # ================= TAB 2: 3D BODY =================
-            with gr.Tab("SAM 3D Body"):
-                gr.Markdown("Detect human bodies and reconstruct **3D Meshes**.")
                 with gr.Row():
-                    with gr.Column(scale=1):
-                        t2_input = gr.Image(label="Input Image", type="pil", height=350)
-                        t2_btn = gr.Button("Generate 3D Body", variant="primary")
-                        t2_status = gr.Textbox(label="Status", interactive=False)
-                    with gr.Column(scale=2):
-                        with gr.Row():
-                            t2_vis_2d = gr.Image(label="2D Detection", type="numpy")
-                            t2_vis_overlay = gr.Image(label="3D Visualization (Original | Overlay | White | Side)", type="numpy")
-                        t2_model_3d = gr.Model3D(
-                            label="Interactive 3D Mesh",
-                            clear_color=[0.0, 0.0, 0.0, 0.0],
-                            camera_position=[0, 0, 4.0]
-                        )
-                t2_btn.click(
-                    process_3d_body,
-                    inputs=[t2_input],
-                    outputs=[t2_vis_2d, t2_vis_overlay, t2_model_3d, t2_status]
-                )
 if __name__ == "__main__":
     demo.launch(mcp_server=True, ssr_mode=False, show_error=True)

 import os
 import spaces
 import gradio as gr
 import numpy as np
 import torch
+import random
+from PIL import Image, ImageDraw
 from typing import Iterable
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
+from transformers import Sam3Processor, Sam3Model
 colors.steel_blue = colors.Color(
     name="steel_blue",
     c50="#EBF3F8",
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
+try:
+    print("Loading SAM3 Model and Processor...")
+    model = Sam3Model.from_pretrained("facebook/sam3").to(device)
+    processor = Sam3Processor.from_pretrained("facebook/sam3")
+    print("Model loaded successfully.")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    print("Ensure you have the correct libraries installed and access to the model.")
+    # Fallback/Placeholder for demonstration if model doesn't exist in environment yet
+    model = None
+    processor = None
 @spaces.GPU
 def segment_image(input_image, text_prompt, threshold=0.5):
     if input_image is None:
         raise gr.Error("Please upload an image.")
     if not text_prompt:
+        raise gr.Error("Please enter a text prompt (e.g., 'cat', 'face').")
+    if model is None or processor is None:
+        raise gr.Error("Model not loaded correctly.")
+    # Convert image to RGB
     image_pil = input_image.convert("RGB")
+    # Preprocess
+    inputs = processor(images=image_pil, text=text_prompt, return_tensors="pt").to(device)
+    # Inference
     with torch.no_grad():
+        outputs = model(**inputs)
+    # Post-process results
+    results = processor.post_process_instance_segmentation(
         outputs,
         threshold=threshold,
         mask_threshold=0.5,
         target_sizes=inputs.get("original_sizes").tolist()
     )[0]
+    masks = results['masks'] # Boolean tensor [N, H, W]
+    scores = results['scores']
+    # Prepare for Gradio AnnotatedImage
+    # Gradio expects (image, [(mask, label), ...])
     annotations = []
+    masks_np = masks.cpu().numpy()
+    scores_np = scores.cpu().numpy()
+    for i, mask in enumerate(masks_np):
+        # mask is a boolean array (True/False).
+        # AnnotatedImage handles the coloring automatically.
+        # We just pass the mask and a label.
+        score_val = scores_np[i]
+        label = f"{text_prompt} ({score_val:.2f})"
         annotations.append((mask, label))
+    # Return tuple format for AnnotatedImage
     return (image_pil, annotations)
+css="""
 #col-container {
     margin: 0 auto;
+    max-width: 980px;
 }
+#main-title h1 {font-size: 2.1em !important;}
 """
 with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
     with gr.Column(elem_id="col-container"):
+        gr.Markdown(
+            "# **SAM3 Image Segmentation**",
+            elem_id="main-title"
+        )
+        gr.Markdown("Segment objects in images using **SAM3** (Segment Anything Model 3) with text prompts.")
+        with gr.Row():
+            with gr.Column(scale=1):
+                input_image = gr.Image(label="Input Image", type="pil", height=300)
+                text_prompt = gr.Textbox(
+                    label="Text Prompt",
+                    placeholder="e.g., cat, ear, car wheel...",
+                )
+                run_button = gr.Button("Segment", variant="primary")
+            with gr.Column(scale=1.5):
+                output_image = gr.AnnotatedImage(label="Segmented Output", height=380)
                 with gr.Row():
+                    threshold = gr.Slider(label="Confidence Threshold", minimum=0.0, maximum=1.0, value=0.4, step=0.05)
+        gr.Examples(
+            examples=[
+                ["examples/player.jpg", "player in white", 0.5],
+                ["examples/goldencat.webp", "black cat", 0.4],
+                ["examples/taxi.jpg", "blue taxi", 0.5],
+            ],
+            inputs=[input_image, text_prompt, threshold],
+            outputs=[output_image],
+            fn=segment_image,
+            cache_examples="lazy",
+            label="Examples"
+        )
+    run_button.click(
+        fn=segment_image,
+        inputs=[input_image, text_prompt, threshold],
+        outputs=[output_image]
+    )
 if __name__ == "__main__":
     demo.launch(mcp_server=True, ssr_mode=False, show_error=True)