Spaces:

JuanLozada97
/

sam-predictor-image-embedding-generator

Sleeping

App Files Files Community

JuanLozada97 commited on Oct 20, 2023

Commit

c6ccb48

•

1 Parent(s): d1bc201

first commit

Browse files

Files changed (6) hide show

app.py +133 -0
examples/img_demo.png +0 -0
examples/truck.jpg +0 -0
medsam_vit_b.pth +3 -0
model.py +12 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import gradio as gr
+import os
+import torch
+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+import base64
+import json
+from segment_anything import sam_model_registry, SamPredictor
+from segment_anything.utils.onnx import SamOnnxModel
+import torch.nn.functional as F
+from model import create_sam_model
+# 1.Setup variables
+device = "cuda" if torch.cuda.is_available() else "cpu"
+checkpoint = "medsam_vit_b.pth"
+model_type = "vit_b"
+# 2.Model preparation and load save weights
+medsam_model = create_sam_model(model_type,checkpoint,device)
+# 3.Predict fn
+@torch.no_grad()
+def medsam_inference(medsam_model, img_embed, box_1024, H, W):
+    box_torch = torch.as_tensor(box_1024, dtype=torch.float, device=img_embed.device)
+    if len(box_torch.shape) == 2:
+        box_torch = box_torch[:, None, :]  # (B, 1, 4)
+    sparse_embeddings, dense_embeddings = medsam_model.prompt_encoder(
+        points=None,
+        boxes=box_torch,
+        masks=None,
+    )
+    low_res_logits, _ = medsam_model.mask_decoder(
+        image_embeddings=img_embed,  # (B, 256, 64, 64)
+        image_pe=medsam_model.prompt_encoder.get_dense_pe(),  # (1, 256, 64, 64)
+        sparse_prompt_embeddings=sparse_embeddings,  # (B, 2, 256)
+        dense_prompt_embeddings=dense_embeddings,  # (B, 256, 64, 64)
+        multimask_output=False,
+    )
+    low_res_pred = torch.sigmoid(low_res_logits)  # (1, 1, 256, 256)
+    low_res_pred = F.interpolate(
+        low_res_pred,
+        size=(H, W),
+        mode="bilinear",
+        align_corners=False,
+    )  # (1, 1, gt.shape)
+    low_res_pred = low_res_pred.squeeze().cpu().numpy()  # (256, 256)
+    medsam_seg = (low_res_pred > 0.5).astype(np.uint8)
+    return medsam_seg
+def predict(img) -> Tuple[Dict, float]:
+    """Transforms and performs a prediction on img and returns prediction and time taken.
+    """
+    # Start the timer
+    start_time = timer()
+    # Transform the target image and add a batch dimension
+    img_np = np.array(img)
+    # Convierte de BGR a RGB si es necesario
+    if img_np.shape[-1] == 3:  # Asegura que sea una imagen en color
+      img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2RGB)
+    if len(img_np.shape) == 2:
+        img_3c = np.repeat(img_np[:, :, None], 3, axis=-1)
+    else:
+        img_3c = img_np
+    H, W, _ = img_3c.shape
+    # %% image preprocessing
+    img_1024 = transform.resize(
+        img_3c, (1024, 1024), order=3, preserve_range=True, anti_aliasing=True
+    ).astype(np.uint8)
+    img_1024 = (img_1024 - img_1024.min()) / np.clip(
+        img_1024.max() - img_1024.min(), a_min=1e-8, a_max=None
+    )  # normalize to [0, 1], (H, W, 3)
+    # convert the shape to (3, H, W)
+    img_1024_tensor = (
+        torch.tensor(img_1024).float().permute(2, 0, 1).unsqueeze(0).to(device)
+    )
+    # Put model into evaluation mode and turn on inference mode
+    medsam_model.eval()
+    with torch.inference_mode():
+      image_embedding = medsam_model.image_encoder(img_1024_tensor)  # (1, 256, 64, 64)
+    # define the inputbox
+    input_box = np.array([[425, 600, 700, 875]])
+    # transfer box_np t0 1024x1024 scale
+    box_1024 = input_box / np.array([W, H, W, H]) * 1024
+    medsam_seg = medsam_inference(medsam_model, image_embedding, box_1024, H, W)
+    pred_time = round(timer() - start_time, 5)
+    fig, ax = plt.subplots(1, 2, figsize=(10, 5))
+    ax[0].imshow(img_3c)
+    show_box(input_box[0], ax[0])
+    ax[0].set_title("Input Image and Bounding Box")
+    ax[1].imshow(img_3c)
+    show_mask(medsam_seg, ax[1])
+    show_box(input_box[0], ax[1])
+    ax[1].set_title("MedSAM Segmentation")
+    # Calculate the prediction time
+    # Return the prediction dictionary and prediction time
+    return fig, pred_time
+# 4. Gradio app
+# Create title, description and article strings
+title = "MedSam"
+description = "a specialized SAM model finely tuned for the segmentation of medical images. With this app, effortlessly extract image embeddings using the model's advanced mask decoder."
+article = "Created at gradio-sam-predictor-image-embedding-generator.ipynb ."
+# Create examples list from "examples/" directory
+example_list = [["examples/" + example] for example in os.listdir("examples")]
+# Create the Gradio demo
+demo = gr.Interface(fn=predict, # mapping function from input to output
+                    inputs=gr.Image(type="pil"), # what are the inputs?
+                    outputs=[gr.Plot(label="Predictions"), # what are the outputs?
+                            gr.Number(label="Prediction time (s)")], # our fn has two outputs, therefore we have two outputs
+                    examples=example_list,
+                    title=title,
+                    description=description,
+                    article=article)
+# Launch the demo!
+demo.launch(debug=False, # print errors locally?
+            share=True) # generate a publically shareable URL?

examples/img_demo.png ADDED Viewed

examples/truck.jpg ADDED Viewed

medsam_vit_b.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9ef4acfee5f5a5d9737a32b5ce03f2cfc5d349289c6a2f8270d7d3f63eaf966
+size 189792256

model.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import torch
+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+from segment_anything import sam_model_registry, SamPredictor
+from segment_anything.utils.onnx import SamOnnxModel
+import torch.nn.functional as F
+def create_sam_model(model_type, checkpoint, device: str = "cpu"):
+    medsam_model = sam_model_registry[model_type](checkpoint=checkpoint)
+    medsam_model = medsam_model.to(device)
+    return medsam_model

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch==2.1.0
+torchvision==0.16.0
+gradio==3.50.2
+'git+https://github.com/facebookresearch/segment-anything.git'