import tempfile import time import cv2 import gradio as gr import tensorflow as tf from moviepy.editor import VideoFileClip from moviepy.video.io.ImageSequenceClip import ImageSequenceClip from configuration import Config from model import load_classifier, load_detector from inference import format_frame, detect_object, classify_action, draw_boxes, draw_classes config = Config() print(f'TensorFlow {tf.__version__}') print(f'Load classifier from {config.classifier_path}') classifier = load_classifier(config) classifier.trainable = False classifier.summary() print('Load detector.') detector = load_detector(config) def fn(video: gr.Video, actions: list[int]): yield None, None if video is None: return print('Process video.') do_detect = 0 in actions do_classify = 1 in actions toggle = True if not do_detect and not do_classify: yield None, video return with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as f: output = f.name clip = VideoFileClip(video) processed_frames = [] frames = [] actions = [] detections = ([], []) for i, frame in enumerate(clip.iter_frames()): if do_classify: if i % config.classify_action_frame_step == 0: frames.append(format_frame(frame, config)) if len(frames) == config.classify_action_num_frames: print(f'Classify action: Until frame {i}') actions = classify_action(classifier, frames, config.id_to_name) frames = [] if do_detect: if i % config.detect_object_frame_step == 0: print(f'Detect object: Frame {i}') detections = detect_object(detector, frame) if do_detect: frame = draw_boxes(frame, detections, actions, do_classify) elif len(actions) > 0: frame = draw_classes(frame, actions) processed_frames.append(frame) if i % config.yield_frame_steps == 0: print(f'Yield latest frame: Frame {i}') quality = 9 image_array = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) _, image_encoded = cv2.imencode('.jpg', image_array, [int(cv2.IMWRITE_JPEG_QUALITY), quality]) with tempfile.NamedTemporaryFile(suffix='.jpeg') as f: f.write(image_encoded) yield f.name, None if toggle: time.sleep(0.5) toggle = False if i % config.classify_action_frame_step != 0 and i % config.detect_object_frame_step != 0: time.sleep(0.1) processed_clip = ImageSequenceClip(processed_frames, clip.fps) processed_clip.audio = clip.audio processed_clip.write_videofile(output, fps=clip.fps, audio_codec='aac', logger=None) yield frame, output inputs = [ gr.Video(sources=['upload'], label='输入视频片段'), gr.CheckboxGroup( ['飞机检测', '飞机行为识别'], label='执行任务', info='可以选择仅执行飞机检测任务或仅执行飞机行为识别任务作为演示。', type='index')] outputs = [ gr.Image(interactive=False, label='最新处理的视频帧'), gr.Video(interactive=False, label='输出视频片段')] examples = [ ['examples/ZFLFDfovqls_001310_001320.mp4'], # cspell: disable-line ['examples/Zv7GyH-fpEY_2023.0_2033.0.mp4']] iface = gr.Interface( title='基于深度学习的视频流飞机检测与行为识别系统', description='上传待检测的视频片段,提交后实时返回最新处理的视频帧,全部处理完成后可以下载标记后的视频片段.', theme='soft', fn=fn, inputs=inputs, outputs=outputs, examples=examples, cache_examples=False) iface.launch()