aero-recognize / app.py
chiyoi's picture
fix
b0bdee4
raw
history blame
No virus
2.3 kB
import tempfile
import numpy as np
import gradio as gr
import tensorflow as tf
from moviepy.editor import VideoFileClip
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
from ultralytics import YOLO
from configurations import *
from core.data import format_frame
from core.model import load_classifier
from core.inference import detect_object, classify_action, draw_boxes
print("Tensorflow version " + tf.__version__)
print('Load classifier.')
classifier_path = 'weights/classifier-8-epoch10.keras'
classifier = load_classifier(classifier_path)
print('Load detector.')
detector_path = 'weights/yolov8n.pt'
detector = YOLO(detector_path)
def fn(video: gr.Video):
print('Process video.')
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
output = f.name
clip = VideoFileClip(video)
processed_frames = []
frames = []
actions = []
detections = ([], [])
for i, frame in enumerate(clip.iter_frames()):
if i % classify_action_frame_steps == 0:
frames.append(format_frame(frame))
if i % detect_object_frame_steps == 0:
print(f'Detect object: Frame {i}')
detections = detect_object(detector, frame)
if len(frames) == classify_action_num_frames:
print(f'Classify action: Until frame {i}')
actions = classify_action(classifier, frames)
frames = []
frame = draw_boxes(frame, detections, actions)
processed_frames.append(frame)
yield frame, None
processed_clip = ImageSequenceClip(processed_frames, clip.fps)
processed_clip.audio = clip.audio
processed_clip.write_videofile(output, fps=clip.fps, audio_codec='aac', logger=None)
yield frame, output
inputs = gr.Video(sources=['upload'], label='Input Video')
outputs = [
gr.Image(interactive=False, label='Last Frame Processed'),
gr.Video(interactive=False, label='Aeroplane Position and Action Marked')]
examples = [
['examples/ZFLFDfovqls_001310_001320.mp4'], # cspell: disable-line
['examples/Zv7GyH-fpEY_2023.0_2033.0.mp4']]
iface = gr.Interface(
title='Aeroplane Position and Action Detection',
description='Detect aeroplane position and action in a video.',
theme='soft',
fn=fn,
inputs=inputs,
outputs=outputs,
examples=examples,
cache_examples=False)
iface.launch()