John Ho
commited on
Commit
·
579e65b
1
Parent(s):
0db2411
added new variable for reference_frame_idx
Browse files- app.py +14 -2
- samv2_handler.py +5 -1
app.py
CHANGED
@@ -116,11 +116,17 @@ def process_image(
|
|
116 |
)
|
117 |
|
118 |
|
119 |
-
@spaces.GPU(
|
|
|
|
|
120 |
@torch.inference_mode()
|
121 |
@torch.autocast(device_type="cuda", dtype=torch.bfloat16)
|
122 |
def process_video(
|
123 |
-
video_path: str,
|
|
|
|
|
|
|
|
|
124 |
):
|
125 |
"""
|
126 |
SAM2 Video Segmentation
|
@@ -148,6 +154,7 @@ def process_video(
|
|
148 |
do_tidy_up=True,
|
149 |
drop_mask=drop_masks,
|
150 |
async_frame_load=True,
|
|
|
151 |
)
|
152 |
|
153 |
|
@@ -196,6 +203,11 @@ with gr.Blocks() as demo:
|
|
196 |
""",
|
197 |
),
|
198 |
gr.Checkbox(label="remove base64 encoded masks from result JSON"),
|
|
|
|
|
|
|
|
|
|
|
199 |
],
|
200 |
outputs=gr.JSON(label="Output JSON"),
|
201 |
title="SAM2 for Videos",
|
|
|
116 |
)
|
117 |
|
118 |
|
119 |
+
@spaces.GPU(
|
120 |
+
duration=120
|
121 |
+
) # user must have 2-minute of inference time left at the time of calling
|
122 |
@torch.inference_mode()
|
123 |
@torch.autocast(device_type="cuda", dtype=torch.bfloat16)
|
124 |
def process_video(
|
125 |
+
video_path: str,
|
126 |
+
variant: str,
|
127 |
+
masks: Union[list, str],
|
128 |
+
drop_masks: bool = False,
|
129 |
+
ref_frame_idx: int = 0,
|
130 |
):
|
131 |
"""
|
132 |
SAM2 Video Segmentation
|
|
|
154 |
do_tidy_up=True,
|
155 |
drop_mask=drop_masks,
|
156 |
async_frame_load=True,
|
157 |
+
ref_frame_idx=ref_frame_idx,
|
158 |
)
|
159 |
|
160 |
|
|
|
203 |
""",
|
204 |
),
|
205 |
gr.Checkbox(label="remove base64 encoded masks from result JSON"),
|
206 |
+
gr.Number(
|
207 |
+
label="frame index for the provided object masks",
|
208 |
+
value=0,
|
209 |
+
precision=0,
|
210 |
+
),
|
211 |
],
|
212 |
outputs=gr.JSON(label="Output JSON"),
|
213 |
title="SAM2 for Videos",
|
samv2_handler.py
CHANGED
@@ -161,6 +161,7 @@ def run_sam_video_inference(
|
|
161 |
do_tidy_up: bool = False,
|
162 |
drop_mask: bool = True,
|
163 |
async_frame_load: bool = False,
|
|
|
164 |
):
|
165 |
# put video frames into directory
|
166 |
# TODO:
|
@@ -183,7 +184,10 @@ def run_sam_video_inference(
|
|
183 |
)
|
184 |
for i, mask in enumerate(masks):
|
185 |
model.add_new_mask(
|
186 |
-
inference_state=inference_state,
|
|
|
|
|
|
|
187 |
)
|
188 |
masks_generator = model.propagate_in_video(inference_state)
|
189 |
|
|
|
161 |
do_tidy_up: bool = False,
|
162 |
drop_mask: bool = True,
|
163 |
async_frame_load: bool = False,
|
164 |
+
ref_frame_idx: int = 0,
|
165 |
):
|
166 |
# put video frames into directory
|
167 |
# TODO:
|
|
|
184 |
)
|
185 |
for i, mask in enumerate(masks):
|
186 |
model.add_new_mask(
|
187 |
+
inference_state=inference_state,
|
188 |
+
frame_idx=ref_frame_idx,
|
189 |
+
obj_id=i,
|
190 |
+
mask=mask,
|
191 |
)
|
192 |
masks_generator = model.propagate_in_video(inference_state)
|
193 |
|