Spaces:

VIPLab
/

Track-Anything

Runtime error

App Files Files Community

watchtowerss commited on Apr 27, 2023

Commit

23d6e96

1 Parent(s): 98d86dc

memory usage reduce for tracking

Browse files

Files changed (4) hide show

app.py +95 -45
inpainter/base_inpainter.py +20 -4
track_anything.py +21 -6
tracker/.DS_Store +0 -0

app.py CHANGED Viewed

@@ -8,15 +8,14 @@ import sys
 sys.path.append(sys.path[0]+"/tracker")
 sys.path.append(sys.path[0]+"/tracker/model")
 from track_anything import TrackingAnything
-from track_anything import parse_augment
 import requests
 import json
 import torchvision
 import torch
-from tools.interact_tools import SamControler
-from tracker.base_tracker import BaseTracker
 from tools.painter import mask_painter
 import psutil
 try:
     from mmcv.cnn import ConvModule
 except:
@@ -71,6 +70,7 @@ def get_prompt(click_state, click_input):
     return prompt
 # extract frames from upload video
 def get_frames_from_video(video_input, video_state):
     """
@@ -81,49 +81,72 @@ def get_frames_from_video(video_input, video_state):
         [[0:nearest_frame], [nearest_frame:], nearest_frame]
     """
     video_path = video_input
-    frames = []
     operation_log = [("",""),("Upload video already. Try click the image for adding targets to track and inpaint.","Normal")]
     try:
         cap = cv2.VideoCapture(video_path)
         fps = cap.get(cv2.CAP_PROP_FPS)
         while cap.isOpened():
             ret, frame = cap.read()
             if ret == True:
                 current_memory_usage = psutil.virtual_memory().percent
-                frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-                if current_memory_usage > 70:
-                    operation_log = [("Memory usage is too high (>70%). Stop the video extraction. Please reduce the video resolution or frame rate or wait for other users to complete the operation.", "Error")]
-                    print("Memory usage is too high (>50%). Please reduce the video resolution or frame rate.")
                     break
             else:
                 break
     except (OSError, TypeError, ValueError, KeyError, SyntaxError) as e:
         print("read_frame_source:{} error. {}\n".format(video_path, str(e)))
-    image_size = (frames[0].shape[0],frames[0].shape[1])
     # initialize video_state
     video_state = {
         "video_name": os.path.split(video_path)[-1],
         "origin_images": frames,
         "painted_images": frames.copy(),
-        "masks": [np.zeros((frames[0].shape[0],frames[0].shape[1]), np.uint8)]*len(frames),
         "logits": [None]*len(frames),
         "select_frame_number": 0,
         "fps": fps
         }
     video_info = "Video Name: {}, FPS: {}, Total Frames: {}, Image Size:{}".format(video_state["video_name"], video_state["fps"], len(frames), image_size)
     model.samcontroler.sam_controler.reset_image()
-    model.samcontroler.sam_controler.set_image(video_state["origin_images"][0])
-    return video_state, video_info, video_state["origin_images"][0], gr.update(visible=True, maximum=len(frames), value=1), gr.update(visible=True, maximum=len(frames), value=len(frames)), \
-                        gr.update(visible=True),\
-                        gr.update(visible=True), gr.update(visible=True), \
-                        gr.update(visible=True), gr.update(visible=True), \
-                        gr.update(visible=True), gr.update(visible=True), \
-                        gr.update(visible=True), gr.update(visible=True), \
-                        gr.update(visible=True, value=operation_log)
 def run_example(example):
-    return video_input
 # get the select frame from gradio slider
 def select_template(image_selection_slider, video_state, interactive_state):
@@ -134,21 +157,22 @@ def select_template(image_selection_slider, video_state, interactive_state):
     # once select a new template frame, set the image in sam
     model.samcontroler.sam_controler.reset_image()
-    model.samcontroler.sam_controler.set_image(video_state["origin_images"][image_selection_slider])
     # update the masks when select a new template frame
     # if video_state["masks"][image_selection_slider] is not None:
         # video_state["painted_images"][image_selection_slider] = mask_painter(video_state["origin_images"][image_selection_slider], video_state["masks"][image_selection_slider])
     operation_log = [("",""), ("Select frame {}. Try click image and add mask for tracking.".format(image_selection_slider),"Normal")]
-    return video_state["painted_images"][image_selection_slider], video_state, interactive_state, operation_log
 # set the tracking end frame
 def get_end_number(track_pause_number_slider, video_state, interactive_state):
     interactive_state["track_end_number"] = track_pause_number_slider
     operation_log = [("",""),("Set the tracking finish at frame {}".format(track_pause_number_slider),"Normal")]
-    return video_state["painted_images"][track_pause_number_slider],interactive_state, operation_log
 def get_resize_ratio(resize_ratio_slider, interactive_state):
     interactive_state["resize_ratio"] = resize_ratio_slider
@@ -172,18 +196,18 @@ def sam_refine(video_state, point_prompt, click_state, interactive_state, evt:gr
     # prompt for sam model
     model.samcontroler.sam_controler.reset_image()
-    model.samcontroler.sam_controler.set_image(video_state["origin_images"][video_state["select_frame_number"]])
     prompt = get_prompt(click_state=click_state, click_input=coordinate)
     mask, logit, painted_image = model.first_frame_click(
-                                                      image=video_state["origin_images"][video_state["select_frame_number"]],
                                                       points=np.array(prompt["input_point"]),
                                                       labels=np.array(prompt["input_label"]),
                                                       multimask=prompt["multimask_output"],
                                                       )
     video_state["masks"][video_state["select_frame_number"]] = mask
     video_state["logits"][video_state["select_frame_number"]] = logit
-    video_state["painted_images"][video_state["select_frame_number"]] = painted_image
     operation_log = [("",""), ("Use SAM for segment. You can try add positive and negative points by clicking. Or press Clear clicks button to refresh the image. Press Add mask button when you are satisfied with the segment","Normal")]
     return painted_image, video_state, interactive_state, operation_log
@@ -203,7 +227,7 @@ def add_multi_mask(video_state, interactive_state, mask_dropdown):
 def clear_click(video_state, click_state):
     click_state = [[],[]]
-    template_frame = video_state["origin_images"][video_state["select_frame_number"]]
     operation_log = [("",""), ("Clear points history and refresh the image.","Normal")]
     return template_frame, click_state, operation_log
@@ -216,7 +240,7 @@ def remove_multi_mask(interactive_state, mask_dropdown):
 def show_mask(video_state, interactive_state, mask_dropdown):
     mask_dropdown.sort()
-    select_frame = video_state["origin_images"][video_state["select_frame_number"]]
     for i in range(len(mask_dropdown)):
         mask_number = int(mask_dropdown[i].split("_")[1]) - 1
@@ -253,18 +277,18 @@ def vos_tracking_video(video_state, interactive_state, mask_dropdown):
         template_mask[0][0]=1
         operation_log = [("Error! Please add at least one mask to track by clicking the left image.","Error"), ("","")]
         # return video_output, video_state, interactive_state, operation_error
-    masks, logits, painted_images = model.generator(images=following_frames, template_mask=template_mask)
     # clear GPU memory
     model.xmem.clear_memory()
     if interactive_state["track_end_number"]:
         video_state["masks"][video_state["select_frame_number"]:interactive_state["track_end_number"]] = masks
         video_state["logits"][video_state["select_frame_number"]:interactive_state["track_end_number"]] = logits
-        video_state["painted_images"][video_state["select_frame_number"]:interactive_state["track_end_number"]] = painted_images
     else:
         video_state["masks"][video_state["select_frame_number"]:] = masks
         video_state["logits"][video_state["select_frame_number"]:] = logits
-        video_state["painted_images"][video_state["select_frame_number"]:] = painted_images
     video_output = generate_video_from_frames(video_state["painted_images"], output_path="./result/track/{}".format(video_state["video_name"]), fps=fps) # import video_input to name the output video
     interactive_state["inference_times"] += 1
@@ -283,20 +307,16 @@ def vos_tracking_video(video_state, interactive_state, mask_dropdown):
         for mask in video_state["masks"]:
             np.save(os.path.join('./result/mask/{}'.format(video_state["video_name"].split('.')[0]), '{:05d}.npy'.format(i)), mask)
             i+=1
-        # save_mask(video_state["masks"], video_state["video_name"])
     #### shanggao code for mask save
     return video_output, video_state, interactive_state, operation_log
-# extracting masks from mask_dropdown
-# def extract_sole_mask(video_state, mask_dropdown):
-#     combined_masks =
-#     unique_masks = np.unique(combined_masks)
-#     return 0
 # inpaint
 def inpaint_video(video_state, interactive_state, mask_dropdown):
     operation_log = [("",""), ("Removed the selected masks.","Normal")]
     frames = np.asarray(video_state["origin_images"])
     fps = video_state["fps"]
     inpaint_masks = np.asarray(video_state["masks"])
@@ -319,13 +339,39 @@ def inpaint_video(video_state, interactive_state, mask_dropdown):
     except:
         operation_log = [("Error! You are trying to inpaint without masks input. Please track the selected mask first, and then press inpaint. If VRAM exceeded, please use the resize ratio to scaling down the image size.","Error"), ("","")]
         inpainted_frames = video_state["origin_images"]
-    video_output = generate_video_from_frames(inpainted_frames, output_path="./result/inpaint/{}".format(video_state["video_name"]), fps=fps) # import video_input to name the output video
     return video_output, operation_log
 # generate video after vos inference
-def generate_video_from_frames(frames, output_path, fps=30):
     """
     Generates a video from a list of frames.
@@ -375,8 +421,8 @@ folder ="./checkpoints"
 SAM_checkpoint = download_checkpoint(sam_checkpoint_url, folder, sam_checkpoint)
 xmem_checkpoint = download_checkpoint(xmem_checkpoint_url, folder, xmem_checkpoint)
 e2fgvi_checkpoint = download_checkpoint_from_google_drive(e2fgvi_checkpoint_id, folder, e2fgvi_checkpoint)
-# args.port = 12214
-# args.device = "cuda:2"
 # args.mask_save = True
 # initialize sam, xmem, e2fgvi models
@@ -409,6 +455,7 @@ with gr.Blocks() as iface:
     video_state = gr.State(
         {
         "video_name": "",
         "origin_images": None,
         "painted_images": None,
@@ -458,7 +505,7 @@ with gr.Blocks() as iface:
                     track_pause_number_slider = gr.Slider(minimum=1, maximum=100, step=1, value=1, label="Track end frames", visible=False)
                 with gr.Column():
-                    run_status = gr.HighlightedText(value=[("Text","Error"),("to be","Label 2"),("highlighted","Label 3")], visible=False)
                     mask_dropdown = gr.Dropdown(multiselect=True, value=[], label="Mask selection", info=".", visible=False)
                     video_output = gr.Video(autosize=True, visible=False).style(height=360)
                     with gr.Row():
@@ -471,9 +518,10 @@ with gr.Blocks() as iface:
         inputs=[
             video_input, video_state
         ],
-        outputs=[video_state, video_info, template_frame,
-                 image_selection_slider, track_pause_number_slider,point_prompt, clear_button_click, Add_mask_button, template_frame,
-                 tracking_video_predict_button, video_output, mask_dropdown, remove_mask_button, inpaint_video_predict_button, run_status]
     )
     # second step: select images from slider
@@ -532,6 +580,8 @@ with gr.Blocks() as iface:
     video_input.clear(
         lambda: (
         {
         "origin_images": None,
         "painted_images": None,
         "masks": None,

 sys.path.append(sys.path[0]+"/tracker")
 sys.path.append(sys.path[0]+"/tracker/model")
 from track_anything import TrackingAnything
+from track_anything import parse_augment, save_image_to_userfolder, read_image_from_userfolder
 import requests
 import json
 import torchvision
 import torch
 from tools.painter import mask_painter
 import psutil
+import time
 try:
     from mmcv.cnn import ConvModule
 except:
     return prompt
 # extract frames from upload video
 def get_frames_from_video(video_input, video_state):
     """
         [[0:nearest_frame], [nearest_frame:], nearest_frame]
     """
     video_path = video_input
+    frames = [] # save image path
+    user_name = time.time()
+    video_state["video_name"] = os.path.split(video_path)[-1]
+    video_state["user_name"] = user_name
+    os.makedirs(os.path.join("/tmp/{}/originimages/{}".format(video_state["user_name"], video_state["video_name"])), exist_ok=True)
+    os.makedirs(os.path.join("/tmp/{}/paintedimages/{}".format(video_state["user_name"], video_state["video_name"])), exist_ok=True)
     operation_log = [("",""),("Upload video already. Try click the image for adding targets to track and inpaint.","Normal")]
     try:
         cap = cv2.VideoCapture(video_path)
         fps = cap.get(cv2.CAP_PROP_FPS)
+        if not cap.isOpened():
+            operation_log = [("No frames extracted, please input video file with '.mp4.' '.mov'.", "Error")]
+            print("No frames extracted, please input video file with '.mp4.' '.mov'.")
+            return None, None, None, None, \
+                    None, None, None, None, \
+                    None, None, None, None, \
+                    None, None, gr.update(visible=True, value=operation_log)
+        image_index = 0
         while cap.isOpened():
             ret, frame = cap.read()
             if ret == True:
                 current_memory_usage = psutil.virtual_memory().percent
+                # try solve memory usage problem, save image to disk instead of memory
+                frames.append(save_image_to_userfolder(video_state, image_index, frame, True))
+                image_index +=1
+                # frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+                if current_memory_usage > 90:
+                    operation_log = [("Memory usage is too high (>90%). Stop the video extraction. Please reduce the video resolution or frame rate.", "Error")]
+                    print("Memory usage is too high (>90%). Please reduce the video resolution or frame rate.")
                     break
             else:
                 break
     except (OSError, TypeError, ValueError, KeyError, SyntaxError) as e:
+    # except:
+        operation_log = [("read_frame_source:{} error. {}\n".format(video_path, str(e)), "Error")]
         print("read_frame_source:{} error. {}\n".format(video_path, str(e)))
+        return None, None, None, None, \
+                None, None, None, None, \
+                None, None, None, None, \
+                None, None, gr.update(visible=True, value=operation_log)
+    first_image = read_image_from_userfolder(frames[0])
+    image_size = (first_image.shape[0], first_image.shape[1])
     # initialize video_state
     video_state = {
+        "user_name": user_name,
         "video_name": os.path.split(video_path)[-1],
         "origin_images": frames,
         "painted_images": frames.copy(),
+        "masks": [np.zeros((image_size[0], image_size[1]), np.uint8)]*len(frames),
         "logits": [None]*len(frames),
         "select_frame_number": 0,
         "fps": fps
         }
     video_info = "Video Name: {}, FPS: {}, Total Frames: {}, Image Size:{}".format(video_state["video_name"], video_state["fps"], len(frames), image_size)
     model.samcontroler.sam_controler.reset_image()
+    model.samcontroler.sam_controler.set_image(first_image)
+    return video_state, video_info, first_image, gr.update(visible=True, maximum=len(frames), value=1), \
+            gr.update(visible=True, maximum=len(frames), value=len(frames)), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), \
+            gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), \
+            gr.update(visible=True), gr.update(visible=True), gr.update(visible=True, value=operation_log),
 def run_example(example):
+    return example
 # get the select frame from gradio slider
 def select_template(image_selection_slider, video_state, interactive_state):
     # once select a new template frame, set the image in sam
     model.samcontroler.sam_controler.reset_image()
+    model.samcontroler.sam_controler.set_image(read_image_from_userfolder(video_state["origin_images"][image_selection_slider]))
     # update the masks when select a new template frame
     # if video_state["masks"][image_selection_slider] is not None:
         # video_state["painted_images"][image_selection_slider] = mask_painter(video_state["origin_images"][image_selection_slider], video_state["masks"][image_selection_slider])
     operation_log = [("",""), ("Select frame {}. Try click image and add mask for tracking.".format(image_selection_slider),"Normal")]
+    return read_image_from_userfolder(video_state["painted_images"][image_selection_slider]), video_state, interactive_state, operation_log
 # set the tracking end frame
 def get_end_number(track_pause_number_slider, video_state, interactive_state):
+    track_pause_number_slider -= 1
     interactive_state["track_end_number"] = track_pause_number_slider
     operation_log = [("",""),("Set the tracking finish at frame {}".format(track_pause_number_slider),"Normal")]
+    return read_image_from_userfolder(video_state["painted_images"][track_pause_number_slider]),interactive_state, operation_log
 def get_resize_ratio(resize_ratio_slider, interactive_state):
     interactive_state["resize_ratio"] = resize_ratio_slider
     # prompt for sam model
     model.samcontroler.sam_controler.reset_image()
+    model.samcontroler.sam_controler.set_image(read_image_from_userfolder(video_state["origin_images"][video_state["select_frame_number"]]))
     prompt = get_prompt(click_state=click_state, click_input=coordinate)
     mask, logit, painted_image = model.first_frame_click(
+                                                      image=read_image_from_userfolder(video_state["origin_images"][video_state["select_frame_number"]]),
                                                       points=np.array(prompt["input_point"]),
                                                       labels=np.array(prompt["input_label"]),
                                                       multimask=prompt["multimask_output"],
                                                       )
     video_state["masks"][video_state["select_frame_number"]] = mask
     video_state["logits"][video_state["select_frame_number"]] = logit
+    video_state["painted_images"][video_state["select_frame_number"]] = save_image_to_userfolder(video_state, index=video_state["select_frame_number"], image=cv2.cvtColor(np.asarray(painted_image),cv2.COLOR_BGR2RGB),type=False)
     operation_log = [("",""), ("Use SAM for segment. You can try add positive and negative points by clicking. Or press Clear clicks button to refresh the image. Press Add mask button when you are satisfied with the segment","Normal")]
     return painted_image, video_state, interactive_state, operation_log
 def clear_click(video_state, click_state):
     click_state = [[],[]]
+    template_frame = read_image_from_userfolder(video_state["origin_images"][video_state["select_frame_number"]])
     operation_log = [("",""), ("Clear points history and refresh the image.","Normal")]
     return template_frame, click_state, operation_log
 def show_mask(video_state, interactive_state, mask_dropdown):
     mask_dropdown.sort()
+    select_frame = read_image_from_userfolder(video_state["origin_images"][video_state["select_frame_number"]])
     for i in range(len(mask_dropdown)):
         mask_number = int(mask_dropdown[i].split("_")[1]) - 1
         template_mask[0][0]=1
         operation_log = [("Error! Please add at least one mask to track by clicking the left image.","Error"), ("","")]
         # return video_output, video_state, interactive_state, operation_error
+    masks, logits, painted_images_path = model.generator(images=following_frames, template_mask=template_mask, video_state=video_state)
     # clear GPU memory
     model.xmem.clear_memory()
     if interactive_state["track_end_number"]:
         video_state["masks"][video_state["select_frame_number"]:interactive_state["track_end_number"]] = masks
         video_state["logits"][video_state["select_frame_number"]:interactive_state["track_end_number"]] = logits
+        video_state["painted_images"][video_state["select_frame_number"]:interactive_state["track_end_number"]] = painted_images_path
     else:
         video_state["masks"][video_state["select_frame_number"]:] = masks
         video_state["logits"][video_state["select_frame_number"]:] = logits
+        video_state["painted_images"][video_state["select_frame_number"]:] = painted_images_path
     video_output = generate_video_from_frames(video_state["painted_images"], output_path="./result/track/{}".format(video_state["video_name"]), fps=fps) # import video_input to name the output video
     interactive_state["inference_times"] += 1
         for mask in video_state["masks"]:
             np.save(os.path.join('./result/mask/{}'.format(video_state["video_name"].split('.')[0]), '{:05d}.npy'.format(i)), mask)
             i+=1
     #### shanggao code for mask save
     return video_output, video_state, interactive_state, operation_log
 # inpaint
 def inpaint_video(video_state, interactive_state, mask_dropdown):
     operation_log = [("",""), ("Removed the selected masks.","Normal")]
+    # solve memory
     frames = np.asarray(video_state["origin_images"])
     fps = video_state["fps"]
     inpaint_masks = np.asarray(video_state["masks"])
     except:
         operation_log = [("Error! You are trying to inpaint without masks input. Please track the selected mask first, and then press inpaint. If VRAM exceeded, please use the resize ratio to scaling down the image size.","Error"), ("","")]
         inpainted_frames = video_state["origin_images"]
+        video_output = generate_video_from_frames(inpainted_frames, output_path="./result/inpaint/{}".format(video_state["video_name"]), fps=fps) # import video_input to name the output video
+    video_output = generate_video_from_paintedframes(inpainted_frames, output_path="./result/inpaint/{}".format(video_state["video_name"]), fps=fps)
     return video_output, operation_log
 # generate video after vos inference
+def generate_video_from_frames(frames_path, output_path, fps=30):
+    """
+    Generates a video from a list of frames.
+    Args:
+        frames (list of numpy arrays): The frames to include in the video.
+        output_path (str): The path to save the generated video.
+        fps (int, optional): The frame rate of the output video. Defaults to 30.
+    """
+    # height, width, layers = frames[0].shape
+    # fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    # video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+    # print(output_path)
+    # for frame in frames:
+    #     video.write(frame)
+    # video.release()
+    frames = []
+    for file in frames_path:
+        frames.append(read_image_from_userfolder(file))
+    frames = torch.from_numpy(np.asarray(frames))
+    if not os.path.exists(os.path.dirname(output_path)):
+        os.makedirs(os.path.dirname(output_path))
+    torchvision.io.write_video(output_path, frames, fps=fps, video_codec="libx264")
+    return output_path
+def generate_video_from_paintedframes(frames, output_path, fps=30):
     """
     Generates a video from a list of frames.
 SAM_checkpoint = download_checkpoint(sam_checkpoint_url, folder, sam_checkpoint)
 xmem_checkpoint = download_checkpoint(xmem_checkpoint_url, folder, xmem_checkpoint)
 e2fgvi_checkpoint = download_checkpoint_from_google_drive(e2fgvi_checkpoint_id, folder, e2fgvi_checkpoint)
+# args.port = 12213
+# args.device = "cuda:1"
 # args.mask_save = True
 # initialize sam, xmem, e2fgvi models
     video_state = gr.State(
         {
+        "user_name": "",
         "video_name": "",
         "origin_images": None,
         "painted_images": None,
                     track_pause_number_slider = gr.Slider(minimum=1, maximum=100, step=1, value=1, label="Track end frames", visible=False)
                 with gr.Column():
+                    run_status = gr.HighlightedText(value=[("Text","Error"),("to be","Label 2"),("highlighted","Label 3")], visible=True)
                     mask_dropdown = gr.Dropdown(multiselect=True, value=[], label="Mask selection", info=".", visible=False)
                     video_output = gr.Video(autosize=True, visible=False).style(height=360)
                     with gr.Row():
         inputs=[
             video_input, video_state
         ],
+        outputs=[video_state, video_info, template_frame, image_selection_slider,
+                 track_pause_number_slider,point_prompt, clear_button_click, Add_mask_button,
+                 template_frame, tracking_video_predict_button, video_output, mask_dropdown,
+                 remove_mask_button, inpaint_video_predict_button, run_status]
     )
     # second step: select images from slider
     video_input.clear(
         lambda: (
         {
+        "user_name": "",
+        "video_name": "",
         "origin_images": None,
         "painted_images": None,
         "masks": None,

inpainter/base_inpainter.py CHANGED Viewed

@@ -1,17 +1,28 @@
 import os
 import glob
 from PIL import Image
 import torch
 import yaml
 import cv2
 import importlib
 import numpy as np
 from tqdm import tqdm
 from inpainter.util.tensor_util import resize_frames, resize_masks
 class BaseInpainter:
     def __init__(self, E2FGVI_checkpoint, device) -> None:
         """
@@ -46,7 +57,7 @@ class BaseInpainter:
                     ref_index.append(i)
         return ref_index
-    def inpaint(self, frames, masks, dilate_radius=15, ratio=1):
         """
         frames: numpy array, T, H, W, 3
         masks: numpy array, T, H, W
@@ -56,6 +67,11 @@ class BaseInpainter:
         Output:
         inpainted_frames: numpy array, T, H, W, 3
         """
         assert frames.shape[:3] == masks.shape, 'different size between frames and masks'
         assert ratio > 0 and ratio <= 1, 'ratio must in (0, 1]'
         masks = masks.copy()

 import os
 import glob
 from PIL import Image
 import torch
 import yaml
 import cv2
 import importlib
 import numpy as np
 from tqdm import tqdm
 from inpainter.util.tensor_util import resize_frames, resize_masks
+def read_image_from_userfolder(image_path):
+    # if type:
+    image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
+    # else:
+        # image = cv2.cvtColor(cv2.imread("/tmp/{}/paintedimages/{}/{:08d}.png".format(username, video_state["video_name"], index+ ".png")), cv2.COLOR_BGR2RGB)
+    return image
+def save_image_to_userfolder(video_state, index, image, type:bool):
+    if type:
+        image_path = "/tmp/{}/originimages/{}/{:08d}.png".format(video_state["user_name"], video_state["video_name"], index)
+    else:
+        image_path = "/tmp/{}/paintedimages/{}/{:08d}.png".format(video_state["user_name"], video_state["video_name"], index)
+    cv2.imwrite(image_path, image)
+    return image_path
 class BaseInpainter:
     def __init__(self, E2FGVI_checkpoint, device) -> None:
         """
                     ref_index.append(i)
         return ref_index
+    def inpaint(self, frames_path, masks, dilate_radius=15, ratio=1):
         """
         frames: numpy array, T, H, W, 3
         masks: numpy array, T, H, W
         Output:
         inpainted_frames: numpy array, T, H, W, 3
         """
+        frames = []
+        for file in frames_path:
+            frames.append(read_image_from_userfolder(file))
+        frames = np.asarray(frames)
         assert frames.shape[:3] == masks.shape, 'different size between frames and masks'
         assert ratio > 0 and ratio <= 1, 'ratio must in (0, 1]'
         masks = masks.copy()

track_anything.py CHANGED Viewed

@@ -6,9 +6,22 @@ from tracker.base_tracker import BaseTracker
 from inpainter.base_inpainter import BaseInpainter
 import numpy as np
 import argparse
 class TrackingAnything():
     def __init__(self, sam_checkpoint, xmem_checkpoint, e2fgvi_checkpoint, args):
         self.args = args
@@ -39,23 +52,25 @@ class TrackingAnything():
     #     mask, logit, painted_image = self.samcontroler.interact_loop(image, same_image_flag, points, labels, logits, multimask)
     #     return mask, logit, painted_image
-    def generator(self, images: list, template_mask:np.ndarray):
         masks = []
         logits = []
         painted_images = []
         for i in tqdm(range(len(images)), desc="Tracking image"):
             if i ==0:
-                mask, logit, painted_image = self.xmem.track(images[i], template_mask)
                 masks.append(mask)
                 logits.append(logit)
-                painted_images.append(painted_image)
             else:
-                mask, logit, painted_image = self.xmem.track(images[i])
                 masks.append(mask)
                 logits.append(logit)
-                painted_images.append(painted_image)
         return masks, logits, painted_images

 from inpainter.base_inpainter import BaseInpainter
 import numpy as np
 import argparse
+import cv2
+def read_image_from_userfolder(image_path):
+    # if type:
+    image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
+    # else:
+        # image = cv2.cvtColor(cv2.imread("/tmp/{}/paintedimages/{}/{:08d}.png".format(username, video_state["video_name"], index+ ".png")), cv2.COLOR_BGR2RGB)
+    return image
+def save_image_to_userfolder(video_state, index, image, type:bool):
+    if type:
+        image_path = "/tmp/{}/originimages/{}/{:08d}.png".format(video_state["user_name"], video_state["video_name"], index)
+    else:
+        image_path = "/tmp/{}/paintedimages/{}/{:08d}.png".format(video_state["user_name"], video_state["video_name"], index)
+    cv2.imwrite(image_path, image)
+    return image_path
 class TrackingAnything():
     def __init__(self, sam_checkpoint, xmem_checkpoint, e2fgvi_checkpoint, args):
         self.args = args
     #     mask, logit, painted_image = self.samcontroler.interact_loop(image, same_image_flag, points, labels, logits, multimask)
     #     return mask, logit, painted_image
+    def generator(self, images: list, template_mask:np.ndarray, video_state:dict):
         masks = []
         logits = []
         painted_images = []
         for i in tqdm(range(len(images)), desc="Tracking image"):
             if i ==0:
+                mask, logit, painted_image = self.xmem.track(read_image_from_userfolder(images[i]), template_mask)
                 masks.append(mask)
                 logits.append(logit)
+                # painted_images.append(painted_image)
+                painted_images.append(save_image_to_userfolder(video_state, index=i, image=cv2.cvtColor(np.asarray(painted_image),cv2.COLOR_BGR2RGB), type=False))
             else:
+                mask, logit, painted_image = self.xmem.track(read_image_from_userfolder(images[i]))
                 masks.append(mask)
                 logits.append(logit)
+                # painted_images.append(painted_image)
+                painted_images.append(save_image_to_userfolder(video_state, index=i, image=cv2.cvtColor(np.asarray(painted_image),cv2.COLOR_BGR2RGB), type=False))
         return masks, logits, painted_images

tracker/.DS_Store CHANGED Viewed

Binary files a/tracker/.DS_Store and b/tracker/.DS_Store differ