Spaces:

lorebianchi98
/

Talk2DINO

Running on Zero

App Files Files Community

lorebianchi98 commited on 18 days ago

Commit

f55d166

1 Parent(s): 6c1c801

Added compatibility to ZeroGPU

Browse files

Files changed (2) hide show

app.py +12 -7
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -9,6 +9,8 @@ from PIL import Image
 from io import BytesIO
 import base64
 from pathlib import Path
 # --- Setup ---
 os.environ["GRADIO_TEMP_DIR"] = "tmp"
@@ -16,11 +18,12 @@ os.makedirs(os.environ["GRADIO_TEMP_DIR"], exist_ok=True)
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# --- Load Models ---
-model_B = AutoModel.from_pretrained("lorebianchi98/Talk2DINO-ViTB", trust_remote_code=True).to(device).eval()
-model_L = AutoModel.from_pretrained("lorebianchi98/Talk2DINO-ViTL", trust_remote_code=True).to(device).eval()
 MODELS = {"ViT-B": model_B, "ViT-L": model_L}
 # --- Example Setup ---
 EXAMPLE_IMAGES_DIR = Path("examples").resolve()
 example_images = sorted([str(p) for p in EXAMPLE_IMAGES_DIR.glob("*.png")])
@@ -36,17 +39,18 @@ DEFAULT_BG_CLEAN = False
 # --- Inference Function ---
 def talk2dino_infer(input_image, class_text, selected_model="ViT-B",
                     apply_pamr=True, with_background=False, bg_thresh=0.55, apply_bg_clean=False):
     if input_image is None:
         raise gr.Error("No image detected. Please select or upload an image first.")
-    model = MODELS[selected_model]
     text = [t.strip() for t in class_text.replace("_", " ").split(",") if t.strip()]
     if len(text) == 0:
         raise gr.Error("Please provide at least one class name before generating segmentation.")
-    img = F.to_tensor(input_image).unsqueeze(0).float().to(device) * 255.0
     # Generate color palette
     palette = [
@@ -84,6 +88,8 @@ def talk2dino_infer(input_image, class_text, selected_model="ViT-B",
         palette,
         texts=text
     )
     return img_out
@@ -100,7 +106,6 @@ with gr.Blocks(title="Talk2DINO Demo") as demo:
     gr.Markdown(f"""
     # 🦖 Talk2DINO Demo
     ![Overview](data:image/png;base64,{img_str})
     <div style="font-size: x-large; white-space: nowrap; display: flex; align-items: center; gap: 10px;">
@@ -234,4 +239,4 @@ with gr.Blocks(title="Talk2DINO Demo") as demo:
         outputs=output_image
     )
-demo.launch(share=True)

 from io import BytesIO
 import base64
 from pathlib import Path
+import spaces  # 👈 REQUIRED for ZeroGPU
 # --- Setup ---
 os.environ["GRADIO_TEMP_DIR"] = "tmp"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# --- Load Models (on CPU first; ZeroGPU will move to CUDA dynamically) ---
+model_B = AutoModel.from_pretrained("lorebianchi98/Talk2DINO-ViTB", trust_remote_code=True).to("cpu").eval()
+model_L = AutoModel.from_pretrained("lorebianchi98/Talk2DINO-ViTL", trust_remote_code=True).to("cpu").eval()
 MODELS = {"ViT-B": model_B, "ViT-L": model_L}
 # --- Example Setup ---
 EXAMPLE_IMAGES_DIR = Path("examples").resolve()
 example_images = sorted([str(p) for p in EXAMPLE_IMAGES_DIR.glob("*.png")])
 # --- Inference Function ---
+@spaces.GPU(duration=120)  # 👈 Allocates GPU dynamically for this call
 def talk2dino_infer(input_image, class_text, selected_model="ViT-B",
                     apply_pamr=True, with_background=False, bg_thresh=0.55, apply_bg_clean=False):
     if input_image is None:
         raise gr.Error("No image detected. Please select or upload an image first.")
+    model = MODELS[selected_model].to("cuda")  # 👈 Move to GPU here
     text = [t.strip() for t in class_text.replace("_", " ").split(",") if t.strip()]
     if len(text) == 0:
         raise gr.Error("Please provide at least one class name before generating segmentation.")
+    img = F.to_tensor(input_image).unsqueeze(0).float().to("cuda") * 255.0
     # Generate color palette
     palette = [
         palette,
         texts=text
     )
+    torch.cuda.empty_cache()  # 👈 Important for ZeroGPU memory cleanup
     return img_out
     gr.Markdown(f"""
     # 🦖 Talk2DINO Demo
     ![Overview](data:image/png;base64,{img_str})
     <div style="font-size: x-large; white-space: nowrap; display: flex; align-items: center; gap: 10px;">
         outputs=output_image
     )
+demo.launch()

requirements.txt CHANGED Viewed

@@ -17,4 +17,5 @@ scikit-learn
 safetensors==0.4.3
 gradio
 torch
-torchvision

 safetensors==0.4.3
 gradio
 torch
+torchvision
+spaces