Spaces:

chiyoi
/

aero-recognize

Sleeping

File size: 3,163 Bytes

b44c3e2
60dc102
0faef99
139dd3e
 
160ded7
0faef99
5b889a9
 
24c941c
049f239
5b889a9
 
139dd3e
5b889a9
 
049f239
c23be95
139dd3e
 
5b889a9
139dd3e
24c941c
139dd3e
24c941c
 
 
a0191b1
 
24c941c
b44c3e2
 
160ded7
b0bdee4
 
 
 
049f239
24c941c
 
049f239
b0bdee4
24c941c
 
5b889a9
b0bdee4
24c941c
 
 
 
 
 
 
b0bdee4
5b889a9
049f239
 
 
60dc102
dd478fa
60dc102
160ded7
 
b0bdee4
 
139dd3e
24c941c
 
 
 
 
 
 
160ded7
24c941c
a0191b1
139dd3e
92ddd3a
4f30d97
160ded7
92ddd3a
139dd3e
160ded7
 
 
139dd3e
 
 
92ddd3a
160ded7
0faef99

import tempfile
import cv2
import gradio as gr
import tensorflow as tf
from moviepy.editor import VideoFileClip
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip

from configuration import Config
from model import load_classifier, load_detector
from inference import format_frame, detect_object, classify_action, draw_boxes, draw_classes

config = Config()
print(f'TensorFlow {tf.__version__}')

print(f'Load classifier from {config.classifier_path}')
classifier = load_classifier(config)
classifier.trainable = False
classifier.summary()

print('Load detector.')
detector = load_detector(config)

def fn(video: gr.Video, actions: list[int]):
  print('Process video.')
  do_detect = 0 in actions
  do_classify = 1 in actions
  if not do_detect and not do_classify:
    yield None, video
    return
  with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as f:
    output = f.name
    clip = VideoFileClip(video)
    processed_frames = []
    frames = []
    actions = []
    detections = ([], [])
    for i, frame in enumerate(clip.iter_frames()):
      if i % config.classify_action_frame_step == 0:
        if do_classify:
          frames.append(format_frame(frame, config))
      if i % config.detect_object_frame_step == 0:
        print(f'Detect object: Frame {i}')
        if do_detect:
          detections = detect_object(detector, frame)
      if len(frames) == config.classify_action_num_frames:
        print(f'Classify action: Until frame {i}')
        if do_classify:
          actions = classify_action(classifier, frames, config.id_to_name)
          frames = []
      if do_detect:
        frame = draw_boxes(frame, detections, actions, do_classify)
      else:
        frame = draw_classes(frame, actions)
      processed_frames.append(frame)
      if i % config.yield_frame_steps == 0:
        quality = 9
        image_array = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        _, image_encoded = cv2.imencode('.jpg', image_array, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
        with tempfile.NamedTemporaryFile(suffix='.jpeg') as f:
          f.write(image_encoded)
          yield f.name, None
    processed_clip = ImageSequenceClip(processed_frames, clip.fps)
    processed_clip.audio = clip.audio
    processed_clip.write_videofile(output, fps=clip.fps, audio_codec='aac', logger=None)
  yield frame, output

inputs = [
  gr.Video(sources=['upload'], label='输入视频片段'),
  gr.CheckboxGroup(
    ['飞机检测', '飞机行为识别'],
    label='执行任务',
    info='可以选择仅执行飞机检测任务或仅执行飞机行为识别任务作为演示。',
    type='index')]
outputs = [
  gr.Image(interactive=False, label='最新处理的视频帧'),
  gr.Video(interactive=False, label='输出视频片段')]

examples = [
  ['examples/ZFLFDfovqls_001310_001320.mp4'], # cspell: disable-line
  ['examples/Zv7GyH-fpEY_2023.0_2033.0.mp4']]

iface = gr.Interface(
  title='Aeroplane Position and Action Detection',
  description='Detect aeroplane position and action in a video.',
  theme='soft',
  fn=fn,
  inputs=inputs,
  outputs=outputs,
  examples=examples,
  cache_examples=False)
iface.launch()