aero-recognize / app.py
chiyoi's picture
Tidy
6cd04e1
raw history blame
No virus
3.16 kB
import tempfile
import cv2
import gradio as gr
import tensorflow as tf
from moviepy.editor import VideoFileClip
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
from configuration import Config
from model import load_classifier, load_detector
from inference import format_frame, detect_object, classify_action, draw_boxes, draw_classes
config = Config()
print(f'TensorFlow {tf.__version__}')
print(f'Load classifier from {config.classifier_path}')
classifier = load_classifier(config)
classifier.trainable = False
classifier.summary()
print('Load detector.')
detector = load_detector(config)
def fn(video: gr.Video, actions: list[int]):
print('Process video.')
do_detect = 0 in actions
do_classify = 1 in actions
if not do_detect and not do_classify:
yield None, video
return
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as f:
output = f.name
clip = VideoFileClip(video)
processed_frames = []
frames = []
actions = []
detections = ([], [])
for i, frame in enumerate(clip.iter_frames()):
if do_classify:
if i % config.classify_action_frame_step == 0:
frames.append(format_frame(frame, config))
if len(frames) == config.classify_action_num_frames:
print(f'Classify action: Until frame {i}')
actions = classify_action(classifier, frames, config.id_to_name)
frames = []
if do_detect:
if i % config.detect_object_frame_step == 0:
print(f'Detect object: Frame {i}')
detections = detect_object(detector, frame)
if do_detect:
frame = draw_boxes(frame, detections, actions, do_classify)
elif len(actions) > 0:
frame = draw_classes(frame, actions)
processed_frames.append(frame)
if i % config.yield_frame_steps == 0:
quality = 9
image_array = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
_, image_encoded = cv2.imencode('.jpg', image_array, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
with tempfile.NamedTemporaryFile(suffix='.jpeg') as f:
f.write(image_encoded)
yield f.name, None
processed_clip = ImageSequenceClip(processed_frames, clip.fps)
processed_clip.audio = clip.audio
processed_clip.write_videofile(output, fps=clip.fps, audio_codec='aac', logger=None)
yield frame, output
inputs = [
gr.Video(sources=['upload'], label='输入视频片段'),
gr.CheckboxGroup(
['飞机检测', '飞机行为识别'],
label='执行任务',
info='可以选择仅执行飞机检测任务或仅执行飞机行为识别任务作为演示。',
type='index')]
outputs = [
gr.Image(interactive=False, label='最新处理的视频帧'),
gr.Video(interactive=False, label='输出视频片段')]
examples = [
['examples/ZFLFDfovqls_001310_001320.mp4'], # cspell: disable-line
['examples/Zv7GyH-fpEY_2023.0_2033.0.mp4']]
iface = gr.Interface(
title='Aeroplane Position and Action Detection',
description='Detect aeroplane position and action in a video.',
theme='soft',
fn=fn,
inputs=inputs,
outputs=outputs,
examples=examples,
cache_examples=False)
iface.launch()