''' This file is the whole dataset curation pipeline to collect the least compressed and the most informative frames from video source. ''' import os, time, sys import shutil import cv2 import torch import argparse # Import files from the local folder root_path = os.path.abspath('.') sys.path.append(root_path) from opt import opt from dataset_curation_pipeline.IC9600.gene import infer_one_image from dataset_curation_pipeline.IC9600.ICNet import ICNet class video_scoring: def __init__(self, IC9600_pretrained_weight_path) -> None: # Init the model self.scorer = ICNet() self.scorer.load_state_dict(torch.load(IC9600_pretrained_weight_path, map_location=torch.device('cpu'))) self.scorer.eval().cuda() def select_frame(self, skip_num, img_lists, target_frame_num, save_dir, output_name_head, partition_idx): ''' Execution of scoring to all I-Frame in img_folder and select target_frame to return back Args: skip_num (int): Only 1 in skip_num will be chosen to accelerate. img_lists (str): The image lists of all files we want to process target_frame_num (int): The number of frames we need to choose save_dir (str): The path where we save those images output_name_head (str): This is the input video name head partition_idx (int): The partition idx ''' stores = [] for idx, image_path in enumerate(sorted(img_lists)): if idx % skip_num != 0: # We only process 1 in 3 to accelerate and also prevent minor case of repeated scene. continue # Evaluate the image complexity score for this image score = infer_one_image(self.scorer, image_path) if verbose: print(image_path, score) stores.append((score, image_path)) if verbose: print(image_path, score) # Find the top most scores' images stores.sort(key=lambda x:x[0]) selected = stores[-target_frame_num:] # print(len(stores), len(selected)) if verbose: print("The lowest selected score is ", selected[0]) # This is a kind of info # Store the selected images for idx, (score, img_path) in enumerate(selected): output_name = output_name_head + "_" +str(partition_idx)+ "_" + str(idx) + ".png" output_path = os.path.join(save_dir, output_name) shutil.copyfile(img_path, output_path) def run(self, skip_num, img_folder, target_frame_num, save_dir, output_name_head, partition_num): ''' Execution of scoring to all I-Frame in img_folder and select target_frame to return back Args: skip_num (int): Only 1 in skip_num will be chosen to accelerate. img_folder (str): The image folder of all I-Frames we need to process target_frame_num (int): The number of frames we need to choose save_dir (str): The path where we save those images output_name_head (str): This is the input video name head partition_num (int): The number of partition we want to crop the video to ''' assert(target_frame_num%partition_num == 0) img_lists = [] for img_name in sorted(os.listdir(img_folder)): path = os.path.join(img_folder, img_name) img_lists.append(path) length = len(img_lists) unit_length = (length // partition_num) target_partition_num = target_frame_num // partition_num # Cut the folder to several partition and select those with the highest score for idx in range(partition_num): select_lists = img_lists[unit_length*idx : unit_length*(idx+1)] self.select_frame(skip_num, select_lists, target_partition_num, save_dir, output_name_head, idx) class frame_collector: def __init__(self, IC9600_pretrained_weight_path, verbose) -> None: self.scoring = video_scoring(IC9600_pretrained_weight_path) self.verbose = verbose def video_split_by_IFrame(self, video_path, tmp_path): ''' Split the video to its I-Frames format Args: video_path (str): The directory to a single video tmp_path (str): A temporary working places to work and will be delete at the end ''' # Prepare the work folder needed if os.path.exists(tmp_path): shutil.rmtree(tmp_path) os.makedirs(tmp_path) # Split Video I-frame cmd = "ffmpeg -i " + video_path + " -loglevel error -vf select='eq(pict_type\,I)' -vsync 2 -f image2 -q:v 1 " + tmp_path + "/image-%06d.png" # At most support 100K I-Frames per video if self.verbose: print(cmd) os.system(cmd) def collect_frames(self, video_folder_dir, save_dir, tmp_path, skip_num, target_frames, partition_num): ''' Automatically collect frames from the video dir Args: video_folder_dir (str): The directory of all videos input save_dir (str): The directory we will store the selected frames tmp_path (str): A temporary working places to work and will be delete at the end skip_num (int): Only 1 in skip_num will be chosen to accelerate. target_frames (list): [# of frames for video under 30 min, # of frames for video over 30 min] partition_num (int): The number of partition we want to crop the video to ''' # Iterate all video under video_folder_dir for video_name in sorted(os.listdir(video_folder_dir)): # Sanity check for this video file format info = video_name.split('.') if info[-1] not in ['mp4', 'mkv', '']: continue output_name_head, extension = info # Get info of this video video_path = os.path.join(video_folder_dir, video_name) duration = get_duration(video_path) # unit in minutes print("We are processing " + video_path + " with duration " + str(duration) + " min") # Split the video to I-frame self.video_split_by_IFrame(video_path, tmp_path) # Score the frames and select those top scored frames we need if duration <= 30: target_frame_num = target_frames[0] else: target_frame_num = target_frames[1] self.scoring.run(skip_num, tmp_path, target_frame_num, save_dir, output_name_head, partition_num) # Remove folders if needed def get_duration(filename): video = cv2.VideoCapture(filename) fps = video.get(cv2.CAP_PROP_FPS) frame_count = video.get(cv2.CAP_PROP_FRAME_COUNT) seconds = frame_count / fps minutes = int(seconds / 60) return minutes if __name__ == "__main__": # Fundamental setting parser = argparse.ArgumentParser() parser.add_argument('--video_folder_dir', type = str, default = '../anime_videos', help = "A folder with video sources") parser.add_argument('--IC9600_pretrained_weight_path', type = str, default = "pretrained/ck.pth", help = "The pretrained IC9600 weight") parser.add_argument('--save_dir', type = str, default = 'APISR_dataset', help = "The folder to store filtered dataset") parser.add_argument('--skip_num', type = int, default = 5, help = "Only 1 in skip_num will be chosen in sequential I-frames to accelerate.") parser.add_argument('--target_frames', type = list, default = [16, 24], help = "[# of frames for video under 30 min, # of frames for video over 30 min]") parser.add_argument('--partition_num', type = int, default = 8, help = "The number of partition we want to crop the video to, to increase diversity of sampling") parser.add_argument('--verbose', type = bool, default = True, help = "Whether we print log message") args = parser.parse_args() # Transform to variable video_folder_dir = args.video_folder_dir IC9600_pretrained_weight_path = args.IC9600_pretrained_weight_path save_dir = args.save_dir skip_num = args.skip_num target_frames = args.target_frames # [# of frames for video under 30 min, # of frames for video over 30 min] partition_num = args.partition_num verbose = args.verbose # Secondary setting tmp_path = "tmp_dataset" # Prepare if os.path.exists(save_dir): shutil.rmtree(save_dir) os.makedirs(save_dir) # Process start = time.time() obj = frame_collector(IC9600_pretrained_weight_path, verbose) obj.collect_frames(video_folder_dir, save_dir, tmp_path, skip_num, target_frames, partition_num) total_time = (time.time() - start)//60 print("Total time spent is {} min".format(total_time)) shutil.rmtree(tmp_path)