Spaces:
Sleeping
Sleeping
File size: 2,303 Bytes
b44c3e2 160ded7 0faef99 139dd3e 160ded7 139dd3e 0faef99 b0bdee4 139dd3e b0bdee4 139dd3e 160ded7 139dd3e b44c3e2 160ded7 b0bdee4 160ded7 b0bdee4 139dd3e 160ded7 139dd3e 92ddd3a 4f30d97 160ded7 92ddd3a 139dd3e 160ded7 139dd3e 92ddd3a 160ded7 0faef99 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import tempfile
import numpy as np
import gradio as gr
import tensorflow as tf
from moviepy.editor import VideoFileClip
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
from ultralytics import YOLO
from configurations import *
from core.data import format_frame
from core.model import load_classifier
from core.inference import detect_object, classify_action, draw_boxes
print("Tensorflow version " + tf.__version__)
print('Load classifier.')
classifier_path = 'weights/classifier-8-epoch10.keras'
classifier = load_classifier(classifier_path)
print('Load detector.')
detector_path = 'weights/yolov8n.pt'
detector = YOLO(detector_path)
def fn(video: gr.Video):
print('Process video.')
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
output = f.name
clip = VideoFileClip(video)
processed_frames = []
frames = []
actions = []
detections = ([], [])
for i, frame in enumerate(clip.iter_frames()):
if i % classify_action_frame_steps == 0:
frames.append(format_frame(frame))
if i % detect_object_frame_steps == 0:
print(f'Detect object: Frame {i}')
detections = detect_object(detector, frame)
if len(frames) == classify_action_num_frames:
print(f'Classify action: Until frame {i}')
actions = classify_action(classifier, frames)
frames = []
frame = draw_boxes(frame, detections, actions)
processed_frames.append(frame)
yield frame, None
processed_clip = ImageSequenceClip(processed_frames, clip.fps)
processed_clip.audio = clip.audio
processed_clip.write_videofile(output, fps=clip.fps, audio_codec='aac', logger=None)
yield frame, output
inputs = gr.Video(sources=['upload'], label='Input Video')
outputs = [
gr.Image(interactive=False, label='Last Frame Processed'),
gr.Video(interactive=False, label='Aeroplane Position and Action Marked')]
examples = [
['examples/ZFLFDfovqls_001310_001320.mp4'], # cspell: disable-line
['examples/Zv7GyH-fpEY_2023.0_2033.0.mp4']]
iface = gr.Interface(
title='Aeroplane Position and Action Detection',
description='Detect aeroplane position and action in a video.',
theme='soft',
fn=fn,
inputs=inputs,
outputs=outputs,
examples=examples,
cache_examples=False)
iface.launch()
|