Spaces:
Running
on
L4
Running
on
L4
Update hugging_face/app.py
#4
by
assile
- opened
- hugging_face/app.py +33 -37
hugging_face/app.py
CHANGED
@@ -25,6 +25,9 @@ from matanyone_wrapper import matanyone
|
|
25 |
from matanyone.utils.get_default_model import get_matanyone_model
|
26 |
from matanyone.inference.inference_core import InferenceCore
|
27 |
|
|
|
|
|
|
|
28 |
def parse_augment():
|
29 |
parser = argparse.ArgumentParser()
|
30 |
parser.add_argument('--device', type=str, default=None)
|
@@ -121,7 +124,6 @@ def get_frames_from_video(video_input, video_state):
|
|
121 |
except Exception as e:
|
122 |
print(f"Audio extraction error: {str(e)}")
|
123 |
audio_path = "" # Set to "" if extraction fails
|
124 |
-
# print(f'audio_path: {audio_path}')
|
125 |
|
126 |
# extract frames
|
127 |
try:
|
@@ -140,15 +142,15 @@ def get_frames_from_video(video_input, video_state):
|
|
140 |
print("read_frame_source:{} error. {}\n".format(video_path, str(e)))
|
141 |
image_size = (frames[0].shape[0],frames[0].shape[1])
|
142 |
|
143 |
-
# resize if resolution too big
|
144 |
-
if image_size[0]>=1280 and image_size[0]>=1280:
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
|
153 |
# initialize video_state
|
154 |
video_state = {
|
@@ -165,8 +167,7 @@ def get_frames_from_video(video_input, video_state):
|
|
165 |
video_info = "Video Name: {},\nFPS: {},\nTotal Frames: {},\nImage Size:{}".format(video_state["video_name"], round(video_state["fps"], 0), len(frames), image_size)
|
166 |
model.samcontroler.sam_controler.reset_image()
|
167 |
model.samcontroler.sam_controler.set_image(video_state["origin_images"][0])
|
168 |
-
return video_state, video_info, video_state["origin_images"][0], \
|
169 |
-
gr.update(visible=True, maximum=len(frames), value=1), gr.update(visible=False, maximum=len(frames), value=len(frames)), \
|
170 |
gr.update(visible=True), gr.update(visible=True), \
|
171 |
gr.update(visible=True), gr.update(visible=True),\
|
172 |
gr.update(visible=True), gr.update(visible=True), \
|
@@ -292,6 +293,7 @@ def image_matting(video_state, interactive_state, mask_dropdown, erode_kernel_si
|
|
292 |
foreground, alpha = matanyone(matanyone_processor, following_frames, template_mask*255, r_erode=erode_kernel_size, r_dilate=dilate_kernel_size, n_warmup=refine_iter)
|
293 |
foreground_output = Image.fromarray(foreground[-1])
|
294 |
alpha_output = Image.fromarray(alpha[-1][:,:,0])
|
|
|
295 |
return foreground_output, alpha_output
|
296 |
|
297 |
# video matting
|
@@ -324,7 +326,7 @@ def video_matting(video_state, interactive_state, mask_dropdown, erode_kernel_si
|
|
324 |
|
325 |
foreground_output = generate_video_from_frames(foreground, output_path="./results/{}_fg.mp4".format(video_state["video_name"]), fps=fps, audio_path=audio_path) # import video_input to name the output video
|
326 |
alpha_output = generate_video_from_frames(alpha, output_path="./results/{}_alpha.mp4".format(video_state["video_name"]), fps=fps, gray2rgb=True, audio_path=audio_path) # import video_input to name the output video
|
327 |
-
|
328 |
return foreground_output, alpha_output
|
329 |
|
330 |
|
@@ -409,38 +411,32 @@ sam_checkpoint_url_dict = {
|
|
409 |
'vit_l': "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth",
|
410 |
'vit_b': "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
|
411 |
}
|
412 |
-
checkpoint_folder = os.path.join('
|
413 |
|
414 |
sam_checkpoint = load_file_from_url(sam_checkpoint_url_dict[args.sam_model_type], checkpoint_folder)
|
415 |
# initialize sams
|
416 |
model = MaskGenerator(sam_checkpoint, args)
|
417 |
|
418 |
# initialize matanyone
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
# matanyone_model = get_matanyone_model(ckpt_path, args.device)
|
423 |
-
# load from Hugging Face
|
424 |
-
from matanyone.model.matanyone import MatAnyone
|
425 |
-
matanyone_model = MatAnyone.from_pretrained("PeiqingYang/MatAnyone")
|
426 |
-
|
427 |
matanyone_model = matanyone_model.to(args.device).eval()
|
428 |
-
matanyone_processor = InferenceCore(matanyone_model, cfg=matanyone_model.cfg)
|
429 |
|
430 |
# download test samples
|
431 |
-
|
432 |
-
|
433 |
-
load_file_from_url(
|
434 |
-
load_file_from_url(
|
435 |
-
load_file_from_url(
|
436 |
-
load_file_from_url(
|
437 |
-
load_file_from_url(
|
438 |
-
load_file_from_url(os.path.join(media_url, 'test-sample1.jpg'), test_sample_path)
|
439 |
|
440 |
# download assets
|
441 |
-
assets_path = os.path.join('
|
442 |
-
load_file_from_url(
|
443 |
-
load_file_from_url(
|
444 |
|
445 |
# documents
|
446 |
title = r"""<div class="multi-layer" align="center"><span>MatAnyone</span></div>
|
@@ -574,11 +570,11 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=my_custom_css) as demo:
|
|
574 |
with gr.Row():
|
575 |
with gr.Column():
|
576 |
gr.Markdown("### Case 1: Single Target")
|
577 |
-
gr.Video(value="
|
578 |
|
579 |
with gr.Column():
|
580 |
gr.Markdown("### Case 2: Multiple Targets")
|
581 |
-
gr.Video(value="
|
582 |
|
583 |
with gr.Tabs():
|
584 |
with gr.TabItem("Video"):
|
@@ -978,4 +974,4 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=my_custom_css) as demo:
|
|
978 |
gr.Markdown(article)
|
979 |
|
980 |
demo.queue()
|
981 |
-
demo.launch(debug=True)
|
|
|
25 |
from matanyone.utils.get_default_model import get_matanyone_model
|
26 |
from matanyone.inference.inference_core import InferenceCore
|
27 |
|
28 |
+
import warnings
|
29 |
+
warnings.filterwarnings("ignore")
|
30 |
+
|
31 |
def parse_augment():
|
32 |
parser = argparse.ArgumentParser()
|
33 |
parser.add_argument('--device', type=str, default=None)
|
|
|
124 |
except Exception as e:
|
125 |
print(f"Audio extraction error: {str(e)}")
|
126 |
audio_path = "" # Set to "" if extraction fails
|
|
|
127 |
|
128 |
# extract frames
|
129 |
try:
|
|
|
142 |
print("read_frame_source:{} error. {}\n".format(video_path, str(e)))
|
143 |
image_size = (frames[0].shape[0],frames[0].shape[1])
|
144 |
|
145 |
+
# [remove for local demo] resize if resolution too big
|
146 |
+
# if image_size[0]>=1280 and image_size[0]>=1280:
|
147 |
+
# scale = 1080 / min(image_size)
|
148 |
+
# new_w = int(image_size[1] * scale)
|
149 |
+
# new_h = int(image_size[0] * scale)
|
150 |
+
# # update frames
|
151 |
+
# frames = [cv2.resize(f, (new_w, new_h), interpolation=cv2.INTER_AREA) for f in frames]
|
152 |
+
# # update image_size
|
153 |
+
# image_size = (frames[0].shape[0],frames[0].shape[1])
|
154 |
|
155 |
# initialize video_state
|
156 |
video_state = {
|
|
|
167 |
video_info = "Video Name: {},\nFPS: {},\nTotal Frames: {},\nImage Size:{}".format(video_state["video_name"], round(video_state["fps"], 0), len(frames), image_size)
|
168 |
model.samcontroler.sam_controler.reset_image()
|
169 |
model.samcontroler.sam_controler.set_image(video_state["origin_images"][0])
|
170 |
+
return video_state, video_info, video_state["origin_images"][0], gr.update(visible=True, maximum=len(frames), value=1), gr.update(visible=False, maximum=len(frames), value=len(frames)), \
|
|
|
171 |
gr.update(visible=True), gr.update(visible=True), \
|
172 |
gr.update(visible=True), gr.update(visible=True),\
|
173 |
gr.update(visible=True), gr.update(visible=True), \
|
|
|
293 |
foreground, alpha = matanyone(matanyone_processor, following_frames, template_mask*255, r_erode=erode_kernel_size, r_dilate=dilate_kernel_size, n_warmup=refine_iter)
|
294 |
foreground_output = Image.fromarray(foreground[-1])
|
295 |
alpha_output = Image.fromarray(alpha[-1][:,:,0])
|
296 |
+
|
297 |
return foreground_output, alpha_output
|
298 |
|
299 |
# video matting
|
|
|
326 |
|
327 |
foreground_output = generate_video_from_frames(foreground, output_path="./results/{}_fg.mp4".format(video_state["video_name"]), fps=fps, audio_path=audio_path) # import video_input to name the output video
|
328 |
alpha_output = generate_video_from_frames(alpha, output_path="./results/{}_alpha.mp4".format(video_state["video_name"]), fps=fps, gray2rgb=True, audio_path=audio_path) # import video_input to name the output video
|
329 |
+
|
330 |
return foreground_output, alpha_output
|
331 |
|
332 |
|
|
|
411 |
'vit_l': "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth",
|
412 |
'vit_b': "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
|
413 |
}
|
414 |
+
checkpoint_folder = os.path.join('..', 'pretrained_models')
|
415 |
|
416 |
sam_checkpoint = load_file_from_url(sam_checkpoint_url_dict[args.sam_model_type], checkpoint_folder)
|
417 |
# initialize sams
|
418 |
model = MaskGenerator(sam_checkpoint, args)
|
419 |
|
420 |
# initialize matanyone
|
421 |
+
pretrain_model_url = "https://github.com/pq-yang/MatAnyone/releases/download/v1.0.0/matanyone.pth"
|
422 |
+
ckpt_path = load_file_from_url(pretrain_model_url, checkpoint_folder)
|
423 |
+
matanyone_model = get_matanyone_model(ckpt_path, args.device)
|
|
|
|
|
|
|
|
|
|
|
424 |
matanyone_model = matanyone_model.to(args.device).eval()
|
425 |
+
# matanyone_processor = InferenceCore(matanyone_model, cfg=matanyone_model.cfg)
|
426 |
|
427 |
# download test samples
|
428 |
+
test_sample_path = os.path.join('.', "test_sample/")
|
429 |
+
load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample0-720p.mp4', test_sample_path)
|
430 |
+
load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample1-720p.mp4', test_sample_path)
|
431 |
+
load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample2-720p.mp4', test_sample_path)
|
432 |
+
load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample3-720p.mp4', test_sample_path)
|
433 |
+
load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample0.jpg', test_sample_path)
|
434 |
+
load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample1.jpg', test_sample_path)
|
|
|
435 |
|
436 |
# download assets
|
437 |
+
assets_path = os.path.join('.', "assets/")
|
438 |
+
load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/tutorial_single_target.mp4', assets_path)
|
439 |
+
load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/tutorial_multi_targets.mp4', assets_path)
|
440 |
|
441 |
# documents
|
442 |
title = r"""<div class="multi-layer" align="center"><span>MatAnyone</span></div>
|
|
|
570 |
with gr.Row():
|
571 |
with gr.Column():
|
572 |
gr.Markdown("### Case 1: Single Target")
|
573 |
+
gr.Video(value="./assets/tutorial_single_target.mp4", elem_classes="video")
|
574 |
|
575 |
with gr.Column():
|
576 |
gr.Markdown("### Case 2: Multiple Targets")
|
577 |
+
gr.Video(value="./assets/tutorial_multi_targets.mp4", elem_classes="video")
|
578 |
|
579 |
with gr.Tabs():
|
580 |
with gr.TabItem("Video"):
|
|
|
974 |
gr.Markdown(article)
|
975 |
|
976 |
demo.queue()
|
977 |
+
demo.launch(share=True, debug=True)
|