import atexit import bisect import multiprocessing as mp from collections import deque import cv2 import torch import argparse import glob import multiprocessing as mp import numpy as np import os import tempfile import time import warnings import cv2 import subprocess import tqdm import gradio as gr TOTAL_FRAMES = 60 subprocess.run(["pip", "install", "git+https://github.com/wjf5203/VNext.git"]) subprocess.run(["git", "clone", "https://github.com/wjf5203/VNext"]) from detectron2.data import MetadataCatalog from detectron2.data.detection_utils import read_image from detectron2.engine.defaults import DefaultPredictor from detectron2.utils.video_visualizer import VideoVisualizer from detectron2.utils.visualizer import ColorMode, Visualizer from detectron2.config import get_cfg from detectron2.utils.logger import setup_logger def test_opencv_video_format(codec, file_ext): with tempfile.TemporaryDirectory(prefix="video_format_test") as dir: filename = os.path.join(dir, "test_file" + file_ext) writer = cv2.VideoWriter( filename=filename, fourcc=cv2.VideoWriter_fourcc(*codec), fps=float(30), frameSize=(10, 10), isColor=True, ) [writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)] writer.release() if os.path.isfile(filename): return True return False def setup_cfg(cfg): # load config from file and command-line arguments cfg = get_cfg() # To use demo for Panoptic-DeepLab, please uncomment the following two lines. # from detectron2.projects.panoptic_deeplab import add_panoptic_deeplab_config # noqa # add_panoptic_deeplab_config(cfg) cfg.merge_from_file("VNext/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml") # Set score_threshold for builtin models cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.5 cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = 0.5 cfg.freeze() return cfg predictor = DefaultPredictor(setup_cfg({})) metadata = MetadataCatalog.get("__unused") def run_on_video(video, total_frames): video_visualizer = VideoVisualizer(metadata, ColorMode.IMAGE) def _frame_from_video(video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to("cpu"), segments_info ) elif "instances" in predictions: predictions = predictions["instances"].to("cpu") vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to("cpu") ) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = _frame_from_video(video) i = 0 for frame in frame_gen: i += 1 if i == total_frames: return yield process_predictions(frame, predictor(frame)) def inference(video): video = cv2.VideoCapture(video) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames_per_second = video.get(cv2.CAP_PROP_FPS) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) print(num_frames) if num_frames>TOTAL_FRAMES: num_frames=TOTAL_FRAMES codec, file_ext = ( ("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4") ) print(codec, file_ext) output_fname = "result.mp4" output_file = cv2.VideoWriter( filename=output_fname, fourcc=cv2.VideoWriter_fourcc(*codec), fps=float(frames_per_second), frameSize=(width, height), isColor=True, ) for vis_frame in tqdm.tqdm(run_on_video(video, num_frames), total=num_frames): output_file.write(vis_frame) video.release() output_file.release() out_file = tempfile.NamedTemporaryFile(suffix="out.mp4", delete=False) subprocess.run(f"ffmpeg -y -loglevel quiet -stats -i {output_fname} -c:v libx264 {out_file.name}".split()) return out_file.name video_interface = gr.Interface( fn=inference, inputs=[ gr.Video(type="file"), ], outputs=gr.Video(type="file", format="mp4"), examples=[ ["inps.mp4"], ["example_3.mp4"], ], allow_flagging=False, allow_screenshot=False, title="VNext", description="demo for wjf5203/VNext" ).launch(debug=True)