Update hugging_face/app.py

#4
by assile - opened
Files changed (1) hide show
  1. hugging_face/app.py +33 -37
hugging_face/app.py CHANGED
@@ -25,6 +25,9 @@ from matanyone_wrapper import matanyone
25
  from matanyone.utils.get_default_model import get_matanyone_model
26
  from matanyone.inference.inference_core import InferenceCore
27
 
 
 
 
28
  def parse_augment():
29
  parser = argparse.ArgumentParser()
30
  parser.add_argument('--device', type=str, default=None)
@@ -121,7 +124,6 @@ def get_frames_from_video(video_input, video_state):
121
  except Exception as e:
122
  print(f"Audio extraction error: {str(e)}")
123
  audio_path = "" # Set to "" if extraction fails
124
- # print(f'audio_path: {audio_path}')
125
 
126
  # extract frames
127
  try:
@@ -140,15 +142,15 @@ def get_frames_from_video(video_input, video_state):
140
  print("read_frame_source:{} error. {}\n".format(video_path, str(e)))
141
  image_size = (frames[0].shape[0],frames[0].shape[1])
142
 
143
- # resize if resolution too big
144
- if image_size[0]>=1280 and image_size[0]>=1280:
145
- scale = 1080 / min(image_size)
146
- new_w = int(image_size[1] * scale)
147
- new_h = int(image_size[0] * scale)
148
- # update frames
149
- frames = [cv2.resize(f, (new_w, new_h), interpolation=cv2.INTER_AREA) for f in frames]
150
- # update image_size
151
- image_size = (frames[0].shape[0],frames[0].shape[1])
152
 
153
  # initialize video_state
154
  video_state = {
@@ -165,8 +167,7 @@ def get_frames_from_video(video_input, video_state):
165
  video_info = "Video Name: {},\nFPS: {},\nTotal Frames: {},\nImage Size:{}".format(video_state["video_name"], round(video_state["fps"], 0), len(frames), image_size)
166
  model.samcontroler.sam_controler.reset_image()
167
  model.samcontroler.sam_controler.set_image(video_state["origin_images"][0])
168
- return video_state, video_info, video_state["origin_images"][0], \
169
- gr.update(visible=True, maximum=len(frames), value=1), gr.update(visible=False, maximum=len(frames), value=len(frames)), \
170
  gr.update(visible=True), gr.update(visible=True), \
171
  gr.update(visible=True), gr.update(visible=True),\
172
  gr.update(visible=True), gr.update(visible=True), \
@@ -292,6 +293,7 @@ def image_matting(video_state, interactive_state, mask_dropdown, erode_kernel_si
292
  foreground, alpha = matanyone(matanyone_processor, following_frames, template_mask*255, r_erode=erode_kernel_size, r_dilate=dilate_kernel_size, n_warmup=refine_iter)
293
  foreground_output = Image.fromarray(foreground[-1])
294
  alpha_output = Image.fromarray(alpha[-1][:,:,0])
 
295
  return foreground_output, alpha_output
296
 
297
  # video matting
@@ -324,7 +326,7 @@ def video_matting(video_state, interactive_state, mask_dropdown, erode_kernel_si
324
 
325
  foreground_output = generate_video_from_frames(foreground, output_path="./results/{}_fg.mp4".format(video_state["video_name"]), fps=fps, audio_path=audio_path) # import video_input to name the output video
326
  alpha_output = generate_video_from_frames(alpha, output_path="./results/{}_alpha.mp4".format(video_state["video_name"]), fps=fps, gray2rgb=True, audio_path=audio_path) # import video_input to name the output video
327
-
328
  return foreground_output, alpha_output
329
 
330
 
@@ -409,38 +411,32 @@ sam_checkpoint_url_dict = {
409
  'vit_l': "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth",
410
  'vit_b': "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
411
  }
412
- checkpoint_folder = os.path.join('/home/user/app/', 'pretrained_models')
413
 
414
  sam_checkpoint = load_file_from_url(sam_checkpoint_url_dict[args.sam_model_type], checkpoint_folder)
415
  # initialize sams
416
  model = MaskGenerator(sam_checkpoint, args)
417
 
418
  # initialize matanyone
419
- # load from ckpt
420
- # pretrain_model_url = "https://github.com/pq-yang/MatAnyone/releases/download/v1.0.0"
421
- # ckpt_path = load_file_from_url(os.path.join(pretrain_model_url, 'matanyone.pth'), checkpoint_folder)
422
- # matanyone_model = get_matanyone_model(ckpt_path, args.device)
423
- # load from Hugging Face
424
- from matanyone.model.matanyone import MatAnyone
425
- matanyone_model = MatAnyone.from_pretrained("PeiqingYang/MatAnyone")
426
-
427
  matanyone_model = matanyone_model.to(args.device).eval()
428
- matanyone_processor = InferenceCore(matanyone_model, cfg=matanyone_model.cfg)
429
 
430
  # download test samples
431
- media_url = "https://github.com/pq-yang/MatAnyone/releases/download/media/"
432
- test_sample_path = os.path.join('/home/user/app/hugging_face/', "test_sample/")
433
- load_file_from_url(os.path.join(media_url, 'test-sample0-720p.mp4'), test_sample_path)
434
- load_file_from_url(os.path.join(media_url, 'test-sample1-720p.mp4'), test_sample_path)
435
- load_file_from_url(os.path.join(media_url, 'test-sample2-720p.mp4'), test_sample_path)
436
- load_file_from_url(os.path.join(media_url, 'test-sample3-720p.mp4'), test_sample_path)
437
- load_file_from_url(os.path.join(media_url, 'test-sample0.jpg'), test_sample_path)
438
- load_file_from_url(os.path.join(media_url, 'test-sample1.jpg'), test_sample_path)
439
 
440
  # download assets
441
- assets_path = os.path.join('/home/user/app/hugging_face/', "assets/")
442
- load_file_from_url(os.path.join(media_url, 'tutorial_single_target.mp4'), assets_path)
443
- load_file_from_url(os.path.join(media_url, 'tutorial_multi_targets.mp4'), assets_path)
444
 
445
  # documents
446
  title = r"""<div class="multi-layer" align="center"><span>MatAnyone</span></div>
@@ -574,11 +570,11 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=my_custom_css) as demo:
574
  with gr.Row():
575
  with gr.Column():
576
  gr.Markdown("### Case 1: Single Target")
577
- gr.Video(value="/home/user/app/hugging_face/assets/tutorial_single_target.mp4", elem_classes="video")
578
 
579
  with gr.Column():
580
  gr.Markdown("### Case 2: Multiple Targets")
581
- gr.Video(value="/home/user/app/hugging_face/assets/tutorial_multi_targets.mp4", elem_classes="video")
582
 
583
  with gr.Tabs():
584
  with gr.TabItem("Video"):
@@ -978,4 +974,4 @@ with gr.Blocks(theme=gr.themes.Monochrome(), css=my_custom_css) as demo:
978
  gr.Markdown(article)
979
 
980
  demo.queue()
981
- demo.launch(debug=True)
 
25
  from matanyone.utils.get_default_model import get_matanyone_model
26
  from matanyone.inference.inference_core import InferenceCore
27
 
28
+ import warnings
29
+ warnings.filterwarnings("ignore")
30
+
31
  def parse_augment():
32
  parser = argparse.ArgumentParser()
33
  parser.add_argument('--device', type=str, default=None)
 
124
  except Exception as e:
125
  print(f"Audio extraction error: {str(e)}")
126
  audio_path = "" # Set to "" if extraction fails
 
127
 
128
  # extract frames
129
  try:
 
142
  print("read_frame_source:{} error. {}\n".format(video_path, str(e)))
143
  image_size = (frames[0].shape[0],frames[0].shape[1])
144
 
145
+ # [remove for local demo] resize if resolution too big
146
+ # if image_size[0]>=1280 and image_size[0]>=1280:
147
+ # scale = 1080 / min(image_size)
148
+ # new_w = int(image_size[1] * scale)
149
+ # new_h = int(image_size[0] * scale)
150
+ # # update frames
151
+ # frames = [cv2.resize(f, (new_w, new_h), interpolation=cv2.INTER_AREA) for f in frames]
152
+ # # update image_size
153
+ # image_size = (frames[0].shape[0],frames[0].shape[1])
154
 
155
  # initialize video_state
156
  video_state = {
 
167
  video_info = "Video Name: {},\nFPS: {},\nTotal Frames: {},\nImage Size:{}".format(video_state["video_name"], round(video_state["fps"], 0), len(frames), image_size)
168
  model.samcontroler.sam_controler.reset_image()
169
  model.samcontroler.sam_controler.set_image(video_state["origin_images"][0])
170
+ return video_state, video_info, video_state["origin_images"][0], gr.update(visible=True, maximum=len(frames), value=1), gr.update(visible=False, maximum=len(frames), value=len(frames)), \
 
171
  gr.update(visible=True), gr.update(visible=True), \
172
  gr.update(visible=True), gr.update(visible=True),\
173
  gr.update(visible=True), gr.update(visible=True), \
 
293
  foreground, alpha = matanyone(matanyone_processor, following_frames, template_mask*255, r_erode=erode_kernel_size, r_dilate=dilate_kernel_size, n_warmup=refine_iter)
294
  foreground_output = Image.fromarray(foreground[-1])
295
  alpha_output = Image.fromarray(alpha[-1][:,:,0])
296
+
297
  return foreground_output, alpha_output
298
 
299
  # video matting
 
326
 
327
  foreground_output = generate_video_from_frames(foreground, output_path="./results/{}_fg.mp4".format(video_state["video_name"]), fps=fps, audio_path=audio_path) # import video_input to name the output video
328
  alpha_output = generate_video_from_frames(alpha, output_path="./results/{}_alpha.mp4".format(video_state["video_name"]), fps=fps, gray2rgb=True, audio_path=audio_path) # import video_input to name the output video
329
+
330
  return foreground_output, alpha_output
331
 
332
 
 
411
  'vit_l': "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth",
412
  'vit_b': "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
413
  }
414
+ checkpoint_folder = os.path.join('..', 'pretrained_models')
415
 
416
  sam_checkpoint = load_file_from_url(sam_checkpoint_url_dict[args.sam_model_type], checkpoint_folder)
417
  # initialize sams
418
  model = MaskGenerator(sam_checkpoint, args)
419
 
420
  # initialize matanyone
421
+ pretrain_model_url = "https://github.com/pq-yang/MatAnyone/releases/download/v1.0.0/matanyone.pth"
422
+ ckpt_path = load_file_from_url(pretrain_model_url, checkpoint_folder)
423
+ matanyone_model = get_matanyone_model(ckpt_path, args.device)
 
 
 
 
 
424
  matanyone_model = matanyone_model.to(args.device).eval()
425
+ # matanyone_processor = InferenceCore(matanyone_model, cfg=matanyone_model.cfg)
426
 
427
  # download test samples
428
+ test_sample_path = os.path.join('.', "test_sample/")
429
+ load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample0-720p.mp4', test_sample_path)
430
+ load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample1-720p.mp4', test_sample_path)
431
+ load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample2-720p.mp4', test_sample_path)
432
+ load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample3-720p.mp4', test_sample_path)
433
+ load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample0.jpg', test_sample_path)
434
+ load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/test-sample1.jpg', test_sample_path)
 
435
 
436
  # download assets
437
+ assets_path = os.path.join('.', "assets/")
438
+ load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/tutorial_single_target.mp4', assets_path)
439
+ load_file_from_url('https://github.com/pq-yang/MatAnyone/releases/download/media/tutorial_multi_targets.mp4', assets_path)
440
 
441
  # documents
442
  title = r"""<div class="multi-layer" align="center"><span>MatAnyone</span></div>
 
570
  with gr.Row():
571
  with gr.Column():
572
  gr.Markdown("### Case 1: Single Target")
573
+ gr.Video(value="./assets/tutorial_single_target.mp4", elem_classes="video")
574
 
575
  with gr.Column():
576
  gr.Markdown("### Case 2: Multiple Targets")
577
+ gr.Video(value="./assets/tutorial_multi_targets.mp4", elem_classes="video")
578
 
579
  with gr.Tabs():
580
  with gr.TabItem("Video"):
 
974
  gr.Markdown(article)
975
 
976
  demo.queue()
977
+ demo.launch(share=True, debug=True)