aero-recognize / app.py
chiyoi's picture
fix
21f4a04
raw
history blame
2.53 kB
import tempfile
import cv2
import gradio as gr
import tensorflow as tf
from moviepy.editor import VideoFileClip
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
from configuration import Config
from model import load_classifier, load_detector
from inference import format_frame, detect_object, classify_action, draw_boxes
config = Config()
print(f'TensorFlow {tf.__version__}')
print(f'Load classifier from {config.classifier_path}')
classifier = load_classifier(config)
classifier.summary()
print('Load detector.')
detector = load_detector(config)
def fn(video: gr.Video):
print('Process video.')
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
output = f.name
clip = VideoFileClip(video)
processed_frames = []
frames = []
actions = []
detections = ([], [])
for i, frame in enumerate(clip.iter_frames()):
if i % config.classify_action_frame_steps == 0:
frames.append(format_frame(frame, config))
if i % config.detect_object_frame_steps == 0:
print(f'Detect object: Frame {i}')
detections = detect_object(detector, frame)
if len(frames) == config.classify_action_num_frames:
print(f'Classify action: Until frame {i}')
actions = classify_action(classifier, frames, config.id_to_name)
frames = []
frame = draw_boxes(frame, detections, actions)
processed_frames.append(frame)
if i % config.yield_frame_steps == 0:
with tempfile.NamedTemporaryFile(suffix='.jpeg') as f:
quality = 1
_, img_encoded = cv2.imencode('.jpg', frame, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
f.write(img_encoded)
yield f.name, None
processed_clip = ImageSequenceClip(processed_frames, clip.fps)
processed_clip.audio = clip.audio
processed_clip.write_videofile(output, fps=clip.fps, audio_codec='aac', logger=None)
yield frame, output
inputs = gr.Video(sources=['upload'], label='Input Video')
outputs = [
gr.Image(interactive=False, label='Last Frame Processed'),
gr.Video(interactive=False, label='Aeroplane Position and Action Marked')]
examples = [
['examples/ZFLFDfovqls_001310_001320.mp4'], # cspell: disable-line
['examples/Zv7GyH-fpEY_2023.0_2033.0.mp4']]
iface = gr.Interface(
title='Aeroplane Position and Action Detection',
description='Detect aeroplane position and action in a video.',
theme='soft',
fn=fn,
inputs=inputs,
outputs=outputs,
examples=examples,
cache_examples=False)
iface.launch()