File size: 2,298 Bytes
b44c3e2
0faef99
139dd3e
 
160ded7
0faef99
5b889a9
 
 
 
 
139dd3e
5b889a9
 
c23be95
139dd3e
 
5b889a9
139dd3e
 
 
b44c3e2
 
 
160ded7
b0bdee4
 
 
 
5b889a9
b0414c0
5b889a9
b0bdee4
 
5b889a9
b0bdee4
5b889a9
b0bdee4
 
 
5b889a9
671a833
160ded7
 
b0bdee4
 
139dd3e
 
160ded7
 
 
139dd3e
92ddd3a
4f30d97
160ded7
92ddd3a
139dd3e
160ded7
 
 
139dd3e
 
 
92ddd3a
160ded7
0faef99
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import tempfile
import gradio as gr
import tensorflow as tf
from moviepy.editor import VideoFileClip
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip

from configuration import Config
from model import load_classifier, load_detector
from inference import format_frame, detect_object, classify_action, draw_boxes
config = Config()
print(f'TensorFlow {tf.__version__}')

print(f'Load classifier from {config.classifier_path}')
classifier = load_classifier(config)
classifier.summary()

print('Load detector.')
detector = load_detector(config)

def fn(video: gr.Video):
  print('Process video.')
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
    output = f.name
    clip = VideoFileClip(video)
    processed_frames = []
    frames = []
    actions = []
    detections = ([], [])
    for i, frame in enumerate(clip.iter_frames()):
      if i % config.classify_action_frame_steps == 0:
        frames.append(format_frame(frame, config))
      if i % config.detect_object_frame_steps == 0:
        print(f'Detect object: Frame {i}')
        detections = detect_object(detector, frame)
      if len(frames) == config.classify_action_num_frames:
        print(f'Classify action: Until frame {i}')
        actions = classify_action(classifier, frames, config.id_to_name)
        frames = []
      frame = draw_boxes(frame, detections, actions)
      processed_frames.append(frame)
      if i % config.yield_frame_steps == 0:
        yield frame, None
    processed_clip = ImageSequenceClip(processed_frames, clip.fps)
    processed_clip.audio = clip.audio
    processed_clip.write_videofile(output, fps=clip.fps, audio_codec='aac', logger=None)
  yield frame, output

inputs = gr.Video(sources=['upload'], label='Input Video')
outputs = [
  gr.Image(interactive=False, label='Last Frame Processed'),
  gr.Video(interactive=False, label='Aeroplane Position and Action Marked')]

examples = [
  ['examples/ZFLFDfovqls_001310_001320.mp4'], # cspell: disable-line
  ['examples/Zv7GyH-fpEY_2023.0_2033.0.mp4']]

iface = gr.Interface(
  title='Aeroplane Position and Action Detection',
  description='Detect aeroplane position and action in a video.',
  theme='soft',
  fn=fn,
  inputs=inputs,
  outputs=outputs,
  examples=examples,
  cache_examples=False)
iface.launch()