Spaces:
Running
on
Zero
Running
on
Zero
wondervictor
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -27,7 +27,7 @@ import numpy as np
|
|
27 |
import sys
|
28 |
import tqdm
|
29 |
|
30 |
-
version = "YxZhang/evf-sam2"
|
31 |
model_type = "sam2"
|
32 |
|
33 |
tokenizer = AutoTokenizer.from_pretrained(
|
@@ -58,7 +58,7 @@ video_model.to('cuda')
|
|
58 |
|
59 |
@spaces.GPU
|
60 |
@torch.no_grad()
|
61 |
-
def inference_image(image_np, prompt):
|
62 |
original_size_list = [image_np.shape[:2]]
|
63 |
|
64 |
image_beit = beit3_preprocess(image_np, 224).to(dtype=image_model.dtype,
|
@@ -68,6 +68,8 @@ def inference_image(image_np, prompt):
|
|
68 |
image_sam = image_sam.to(dtype=image_model.dtype,
|
69 |
device=image_model.device)
|
70 |
|
|
|
|
|
71 |
input_ids = tokenizer(
|
72 |
prompt, return_tensors="pt")["input_ids"].to(device=image_model.device)
|
73 |
|
@@ -93,7 +95,7 @@ def inference_image(image_np, prompt):
|
|
93 |
@spaces.GPU
|
94 |
@torch.no_grad()
|
95 |
@torch.autocast(device_type="cuda", dtype=torch.float16)
|
96 |
-
def inference_video(video_path, prompt):
|
97 |
|
98 |
os.system("rm -rf demo_temp")
|
99 |
os.makedirs("demo_temp/input_frames", exist_ok=True)
|
@@ -109,6 +111,8 @@ def inference_video(video_path, prompt):
|
|
109 |
image_beit = beit3_preprocess(image_np, 224).to(dtype=video_model.dtype,
|
110 |
device=video_model.device)
|
111 |
|
|
|
|
|
112 |
input_ids = tokenizer(
|
113 |
prompt, return_tensors="pt")["input_ids"].to(device=video_model.device)
|
114 |
|
@@ -162,6 +166,12 @@ with gr.Blocks() as demo:
|
|
162 |
submit_image = gr.Button(value='Submit',
|
163 |
scale=1,
|
164 |
variant='primary')
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
with gr.Tab(label="EVF-SAM-2-Video"):
|
166 |
with gr.Row():
|
167 |
input_video = gr.Video(label='Input Video')
|
@@ -175,11 +185,17 @@ with gr.Blocks() as demo:
|
|
175 |
submit_video = gr.Button(value='Submit',
|
176 |
scale=1,
|
177 |
variant='primary')
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
|
179 |
submit_image.click(fn=inference_image,
|
180 |
-
inputs=[input_image, image_prompt],
|
181 |
outputs=output_image)
|
182 |
submit_video.click(fn=inference_video,
|
183 |
-
inputs=[input_video, video_prompt],
|
184 |
outputs=output_video)
|
185 |
demo.launch(show_error=True)
|
|
|
27 |
import sys
|
28 |
import tqdm
|
29 |
|
30 |
+
version = "YxZhang/evf-sam2-multitask"
|
31 |
model_type = "sam2"
|
32 |
|
33 |
tokenizer = AutoTokenizer.from_pretrained(
|
|
|
58 |
|
59 |
@spaces.GPU
|
60 |
@torch.no_grad()
|
61 |
+
def inference_image(image_np, prompt, semantic_type):
|
62 |
original_size_list = [image_np.shape[:2]]
|
63 |
|
64 |
image_beit = beit3_preprocess(image_np, 224).to(dtype=image_model.dtype,
|
|
|
68 |
image_sam = image_sam.to(dtype=image_model.dtype,
|
69 |
device=image_model.device)
|
70 |
|
71 |
+
if semantic_type:
|
72 |
+
prompt = "[semantic] " + prompt
|
73 |
input_ids = tokenizer(
|
74 |
prompt, return_tensors="pt")["input_ids"].to(device=image_model.device)
|
75 |
|
|
|
95 |
@spaces.GPU
|
96 |
@torch.no_grad()
|
97 |
@torch.autocast(device_type="cuda", dtype=torch.float16)
|
98 |
+
def inference_video(video_path, prompt, semantic_type):
|
99 |
|
100 |
os.system("rm -rf demo_temp")
|
101 |
os.makedirs("demo_temp/input_frames", exist_ok=True)
|
|
|
111 |
image_beit = beit3_preprocess(image_np, 224).to(dtype=video_model.dtype,
|
112 |
device=video_model.device)
|
113 |
|
114 |
+
if semantic_type:
|
115 |
+
prompt = "[semantic] " + prompt
|
116 |
input_ids = tokenizer(
|
117 |
prompt, return_tensors="pt")["input_ids"].to(device=video_model.device)
|
118 |
|
|
|
166 |
submit_image = gr.Button(value='Submit',
|
167 |
scale=1,
|
168 |
variant='primary')
|
169 |
+
with gr.Row():
|
170 |
+
semantic_type_img = gr.Checkbox(
|
171 |
+
False,
|
172 |
+
label="semantic level",
|
173 |
+
info="check this if you want to segment body parts or background or multi objects (only available with latest evf-sam checkpoint)"
|
174 |
+
)
|
175 |
with gr.Tab(label="EVF-SAM-2-Video"):
|
176 |
with gr.Row():
|
177 |
input_video = gr.Video(label='Input Video')
|
|
|
185 |
submit_video = gr.Button(value='Submit',
|
186 |
scale=1,
|
187 |
variant='primary')
|
188 |
+
with gr.Row():
|
189 |
+
semantic_type_vid = gr.Checkbox(
|
190 |
+
False,
|
191 |
+
label="semantic level",
|
192 |
+
info="check this if you want to segment body parts or background or multi objects (only available with latest evf-sam checkpoint)"
|
193 |
+
)
|
194 |
|
195 |
submit_image.click(fn=inference_image,
|
196 |
+
inputs=[input_image, image_prompt, semantic_type_img],
|
197 |
outputs=output_image)
|
198 |
submit_video.click(fn=inference_video,
|
199 |
+
inputs=[input_video, video_prompt, semantic_type_vid],
|
200 |
outputs=output_video)
|
201 |
demo.launch(show_error=True)
|