Spaces:
Runtime error
Runtime error
File size: 3,865 Bytes
e7d5680 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import os
from multiprocessing import Pool
from mmengine.logging import MMLogger
from scenedetect import ContentDetector, detect
from tqdm import tqdm
from opensora.utils.misc import get_timestamp
from .utils import check_mp4_integrity, clone_folder_structure, iterate_files, split_video
# config
target_fps = 30 # int
shorter_size = 512 # int
min_seconds = 1 # float
max_seconds = 5 # float
assert max_seconds > min_seconds
cfg = dict(
target_fps=target_fps,
min_seconds=min_seconds,
max_seconds=max_seconds,
shorter_size=shorter_size,
)
def process_folder(root_src, root_dst):
# create logger
folder_path_log = os.path.dirname(root_dst)
log_name = os.path.basename(root_dst)
timestamp = get_timestamp()
log_path = os.path.join(folder_path_log, f"{log_name}_{timestamp}.log")
logger = MMLogger.get_instance(log_name, log_file=log_path)
# clone folder structure
clone_folder_structure(root_src, root_dst)
# all source videos
mp4_list = [x for x in iterate_files(root_src) if x.endswith(".mp4")]
mp4_list = sorted(mp4_list)
for idx, sample_path in tqdm(enumerate(mp4_list)):
folder_src = os.path.dirname(sample_path)
folder_dst = os.path.join(root_dst, os.path.relpath(folder_src, root_src))
# check src video integrity
if not check_mp4_integrity(sample_path, logger=logger):
continue
# detect scenes
scene_list = detect(sample_path, ContentDetector(), start_in_scene=True)
# split scenes
save_path_list = split_video(sample_path, scene_list, save_dir=folder_dst, **cfg, logger=logger)
# check integrity of generated clips
for x in save_path_list:
check_mp4_integrity(x, logger=logger)
def scene_detect():
"""detect & cut scenes using a single process
Expected dataset structure:
data/
your_dataset/
raw_videos/
xxx.mp4
yyy.mp4
This function results in:
data/
your_dataset/
raw_videos/
xxx.mp4
yyy.mp4
zzz.mp4
clips/
xxx_scene-0.mp4
yyy_scene-0.mp4
yyy_scene-1.mp4
"""
# TODO: specify your dataset root
root_src = f"./data/your_dataset/raw_videos"
root_dst = f"./data/your_dataset/clips"
process_folder(root_src, root_dst)
def scene_detect_mp():
"""detect & cut scenes using multiple processes
Expected dataset structure:
data/
your_dataset/
raw_videos/
split_0/
xxx.mp4
yyy.mp4
split_1/
xxx.mp4
yyy.mp4
This function results in:
data/
your_dataset/
raw_videos/
split_0/
xxx.mp4
yyy.mp4
split_1/
xxx.mp4
yyy.mp4
clips/
split_0/
xxx_scene-0.mp4
yyy_scene-0.mp4
split_1/
xxx_scene-0.mp4
yyy_scene-0.mp4
yyy_scene-1.mp4
"""
# TODO: specify your dataset root
root_src = f"./data/your_dataset/raw_videos"
root_dst = f"./data/your_dataset/clips"
# TODO: specify your splits
splits = ["split_0", "split_1"]
# process folders
root_src_list = [os.path.join(root_src, x) for x in splits]
root_dst_list = [os.path.join(root_dst, x) for x in splits]
with Pool(processes=len(splits)) as pool:
pool.starmap(process_folder, list(zip(root_src_list, root_dst_list)))
if __name__ == "__main__":
# TODO: choose single process or multiprocessing
scene_detect()
# scene_detect_mp()
|