Spaces:

JuanLozada97
/

sam-predictor-image-embedding-generator

Sleeping

App Files Files Community

JuanLozada97 commited on Nov 22, 2023

Commit

28833ba

•

1 Parent(s): 70ee8a6

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -10

app.py CHANGED Viewed

@@ -72,7 +72,7 @@ def medsam_inference(medsam_model, img_embed, box_1024, H, W):
     medsam_seg = (low_res_pred > 0.5).astype(np.uint8)
     return medsam_seg
-def predict(img,x1,y1,x2,y2) -> Tuple[Dict, float]:
     """Transforms and performs a prediction on img and returns prediction and time taken.
     """
     # Start the timer
@@ -106,11 +106,9 @@ def predict(img,x1,y1,x2,y2) -> Tuple[Dict, float]:
     with torch.inference_mode():
       image_embedding = medsam_model.image_encoder(img_1024_tensor)  # (1, 256, 64, 64)
     # define the inputbox
-    input_box = np.array([[x1,y1,x2,y2]])
-    input_box = np.nan_to_num(input_box, nan=0)
     # transfer box_np t0 1024x1024 scale
-    scaling_factor = 1/np.array([W, H, W, H])
-    box_1024 = input_box.astype(int) * scaling_factor * 1024
     medsam_seg = medsam_inference(medsam_model, image_embedding, box_1024, H, W)
     pred_time = round(timer() - start_time, 5)
@@ -143,11 +141,7 @@ example_list = [["examples/" + example] for example in os.listdir("examples")]
 # Create the Gradio demo
 demo = gr.Interface(fn=predict, # mapping function from input to output
-                    inputs=[gr.Image(type="pil"),
-                            gr.Slider(0, 512, randomize=True,step=1, label="X1",info="top-left point"),
-                            gr.Slider(0, 512, randomize=True,step=1, label="Y1"),
-                            gr.Slider(0, 512, randomize=True,step=1, label="X2",info="bottom-right point"),
-                            gr.Slider(0, 512, randomize=True,step=1, label="Y2"),], # what are the inputs?
                     outputs=[gr.Plot(label="Predictions"), # what are the outputs?
                             gr.Number(label="Prediction time (s)"),
                             gr.JSON(label="Embedding Image")], # our fn has two outputs, therefore we have two outputs

     medsam_seg = (low_res_pred > 0.5).astype(np.uint8)
     return medsam_seg
+def predict(img) -> Tuple[Dict, float]:
     """Transforms and performs a prediction on img and returns prediction and time taken.
     """
     # Start the timer
     with torch.inference_mode():
       image_embedding = medsam_model.image_encoder(img_1024_tensor)  # (1, 256, 64, 64)
     # define the inputbox
+    input_box = np.array([[125, 275, 190, 350]])
     # transfer box_np t0 1024x1024 scale
+    box_1024 = input_box.astype(int) / np.array([W, H, W, H])* 1024
     medsam_seg = medsam_inference(medsam_model, image_embedding, box_1024, H, W)
     pred_time = round(timer() - start_time, 5)
 # Create the Gradio demo
 demo = gr.Interface(fn=predict, # mapping function from input to output
+                    inputs=gr.Image(type="pil"), # what are the inputs?
                     outputs=[gr.Plot(label="Predictions"), # what are the outputs?
                             gr.Number(label="Prediction time (s)"),
                             gr.JSON(label="Embedding Image")], # our fn has two outputs, therefore we have two outputs