Spaces:

chiyoi
/

aero-recognize

Sleeping

App Files Files Community

chiyoi commited on Mar 28

Commit

5b889a9

•

1 Parent(s): 9f004b3

update

Browse files

Files changed (9) hide show

app.py +13 -17
configuration.py +32 -0
configurations.py +0 -48
core/data.py +0 -71
core/model.py +0 -38
core/inference.py → inference.py +11 -8
model.py +54 -0
requirements.txt +2 -2
weights/classifier-7.keras +0 -3

app.py CHANGED Viewed

@@ -1,24 +1,20 @@
 import tempfile
 import gradio as gr
 import tensorflow as tf
 from moviepy.editor import VideoFileClip
 from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
-from ultralytics import YOLO
-from configurations import *
-from core.data import format_frame
-from core.model import load_classifier
-from core.inference import detect_object, classify_action, draw_boxes
-print("Tensorflow version " + tf.__version__)
-print('Load classifier.')
-classifier_path = 'weights/classifier-8-epoch10.keras'
-classifier = load_classifier(classifier_path)
 print('Load detector.')
-detector_path = 'weights/yolov8n.pt'
-detector = YOLO(detector_path)
 def fn(video: gr.Video):
   print('Process video.')
@@ -30,18 +26,18 @@ def fn(video: gr.Video):
     actions = []
     detections = ([], [])
     for i, frame in enumerate(clip.iter_frames()):
-      if i % classify_action_frame_steps == 0:
         frames.append(format_frame(frame))
-      if i % detect_object_frame_steps == 0:
         print(f'Detect object: Frame {i}')
         detections = detect_object(detector, frame)
-      if len(frames) == classify_action_num_frames:
         print(f'Classify action: Until frame {i}')
-        actions = classify_action(classifier, frames)
         frames = []
       frame = draw_boxes(frame, detections, actions)
       processed_frames.append(frame)
-      if i % yield_frame_steps == 0:
         yield frame, None
     processed_clip = ImageSequenceClip(processed_frames, clip.fps)
     processed_clip.audio = clip.audio

 import tempfile
 import gradio as gr
 import tensorflow as tf
 from moviepy.editor import VideoFileClip
 from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
+from configuration import Config
+from model import load_classifier, load_detector
+from inference import format_frame, detect_object, classify_action, draw_boxes
+config = Config()
+print(f'TensorFlow {tf.__version__}')
+print(f'Load classifier from {config.classifier_path}')
+classifier = load_classifier(config)
 print('Load detector.')
+detector = load_detector(config)
 def fn(video: gr.Video):
   print('Process video.')
     actions = []
     detections = ([], [])
     for i, frame in enumerate(clip.iter_frames()):
+      if i % config.classify_action_frame_steps == 0:
         frames.append(format_frame(frame))
+      if i % config.detect_object_frame_steps == 0:
         print(f'Detect object: Frame {i}')
         detections = detect_object(detector, frame)
+      if len(frames) == config.classify_action_num_frames:
         print(f'Classify action: Until frame {i}')
+        actions = classify_action(classifier, frames, config.id_to_name)
         frames = []
       frame = draw_boxes(frame, detections, actions)
       processed_frames.append(frame)
+      if i % config.yield_frame_steps == 0:
         yield frame, None
     processed_clip = ImageSequenceClip(processed_frames, clip.fps)
     processed_clip.audio = clip.audio

configuration.py ADDED Viewed

	@@ -0,0 +1,32 @@

+class Config:
+  num_frames = 8
+  frame_step = 15
+  resolution = 224
+  frame_size = (resolution, resolution)
+  id_to_name = {
+    0: 'Flying',
+    1: 'Landing',
+    2: 'Other',
+    3: 'Straight Taxiing',
+    4: 'Takeoff',
+    5: 'Turning Maneuver',
+  }
+  name_to_id = {
+    'Flying': 0,
+    'Landing': 1,
+    'Other': 2,
+    'Straight Taxiing': 3,
+    'Takeoff': 4,
+    'Turning Maneuver': 5,
+  }
+  model_id = 'a0'
+  detector_path = 'weights/yolov8n.pt'
+  classifier_path = 'weights/classifier-8-epoch10.keras'
+  num_classes = len(id_to_name)
+  input_shape = (1, num_frames, resolution, resolution, 3)
+  detect_object_frame_steps = 5
+  classify_action_frame_steps = 15
+  classify_action_num_frames = 8
+  yield_frame_steps = 10

configurations.py DELETED Viewed

@@ -1,48 +0,0 @@
-# Data
-data_dir = 'storage/dataset'
-training_ratio = 0.7
-validation_ratio = 0.02
-num_frames = 8
-frame_step = 1
-resolution = 224
-frame_size = (resolution, resolution)
-id_to_name = {
-  0: 'Flying',
-  1: 'Landing',
-  2: 'Other',
-  3: 'Straight Taxiing',
-  4: 'Takeoff',
-  5: 'Turning Maneuver',
-}
-name_to_id = {
-  'Flying': 0,
-  'Landing': 1,
-  'Other': 2,
-  'Straight Taxiing': 3,
-  'Takeoff': 4,
-  'Turning Maneuver': 5,
-}
-# Model
-model_id = 'a0'
-checkpoint_dir = f'storage/pretrained_weights/movinet_{model_id}_base'
-num_classes = 6
-# Inference
-detect_object_frame_steps = 10
-classify_action_frame_steps = 15
-classify_action_num_frames = 8
-yield_frame_steps = 10
-# Train
-train_id = 8
-batch_size = 16
-learning_rate = 0.001
-epochs = 15
-model_save_path = f'storage/output/classifier-{train_id}.keras'
-log_dir = f'storage/logs/classifier-{train_id}.log'
-# Train more
-initial_epoch = 0

core/data.py DELETED Viewed

@@ -1,71 +0,0 @@
-from pathlib import Path
-import random
-from typing import Literal
-import cv2
-import numpy as np
-import tensorflow as tf
-from configurations import *
-def format_frame(frame):
-  frame = tf.image.convert_image_dtype(frame, tf.float32)
-  frame = tf.image.resize_with_pad(frame, *frame_size)
-  return frame
-def pick_frames(video: str):
-  capture = cv2.VideoCapture(video)
-  if not capture.isOpened(): raise ValueError('Video file could not be opened.')
-  total_frames = capture.get(cv2.CAP_PROP_FRAME_COUNT)
-  need_frames = 1 + (num_frames - 1) * frame_step
-  if need_frames <= total_frames:
-    start = random.randint(0, total_frames - need_frames + 1)
-    capture.set(cv2.CAP_PROP_POS_FRAMES, start)
-  frames = []
-  for _ in range(num_frames):
-    for _ in range(frame_step):
-      ok, frame = capture.read()
-    if ok: frames.append(format_frame(frame))
-    else: frames.append(np.zeros(frame_size + (3,)))
-  capture.release()
-  frames = np.array(frames)
-  frames = frames[..., [2, 1, 0]]
-  return frames
-def Data():
-  data_dir_path = Path(data_dir)
-  return {
-    'training': {
-      a.name: (
-        lambda ps: ps[
-          :int(len(ps) * training_ratio)])(
-        [x for x in a.iterdir()])
-      for a in data_dir_path.iterdir()},
-    'validation': {
-      a.name: (
-        lambda ps: ps[
-          int(len(ps) * training_ratio):
-          int(len(ps) * (training_ratio + validation_ratio))])(
-        [x for x in a.iterdir()])
-      for a in data_dir_path.iterdir()},
-    'testing': {
-      a.name: (
-        lambda ps: ps[
-          int(len(ps) * (training_ratio + validation_ratio)):])(
-        [x for x in a.iterdir()])
-      for a in data_dir_path.iterdir()},
-  }
-def FrameGenerator(split: Literal['training', 'validation']):
-  data = Data()
-  def generator():
-    pairs = [
-      (str(video), class_name)
-      for class_name, videos in data[split].items()
-      for video in videos
-    ]
-    random.shuffle(pairs)
-    for video, class_name in pairs:
-      frames = pick_frames(video)
-      label = name_to_id[class_name]
-      yield frames, label
-  return generator

core/model.py DELETED Viewed

@@ -1,38 +0,0 @@
-import tensorflow as tf
-from tensorflow import keras
-from official.projects.movinet.modeling import movinet
-from official.projects.movinet.modeling import movinet_model
-from configurations import *
-def load_backbone():
-  return movinet.Movinet()
-def build_classifier():
-  backbone = load_backbone()
-  model = movinet_model.MovinetClassifier(
-    backbone=backbone,
-    num_classes=600)
-  checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir)
-  checkpoint = tf.train.Checkpoint(model=model)
-  status = checkpoint.restore(checkpoint_path)
-  status.assert_existing_objects_matched()
-  model.build([batch_size, num_frames, resolution, resolution, 3])
-  output = keras.layers.Dense(num_classes)
-  return keras.Sequential(layers=[model, output])
-def load_classifier(classifier_path):
-  backbone = load_backbone()
-  model = movinet_model.MovinetClassifier(
-    backbone=backbone,
-    num_classes=600)
-  model.build([batch_size, num_frames, resolution, resolution, 3])
-  output = keras.layers.Dense(num_classes)
-  model = keras.Sequential(layers=[model, output])
-  model.load_weights(classifier_path)
-  return model
-def compile_classifier(model):
-  loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
-  optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
-  model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

core/inference.py → inference.py RENAMED Viewed

@@ -2,12 +2,16 @@ from imgviz import instances2rgb
 import tensorflow as tf
 import numpy as np
-from configurations import *
-from core.data import format_frame
 # detections: (classes: list of class_name, boxes: list of [x1, y1, x2, y2])
 # actions: list of f'{action_name}: {confidence}'
 def detect_object(detector, frame):
   result = detector(frame, classes=4, verbose=False)[0]
   classes = result.boxes.cls.numpy()
@@ -18,10 +22,9 @@ def detect_object(detector, frame):
   )
   return detections
-def classify_action(classifier, frames):
   actions = []
   frames = np.array(frames)
-  # frames = frames[..., [2, 1, 0]]
   frames = tf.expand_dims(frames, 0)
   output = classifier(frames)
   confidences = tf.nn.softmax(output).numpy()[0]
@@ -62,7 +65,7 @@ def draw_boxes(frame, detections, actions):
   )
   return frame
-def FrameProcessor(detector, classifier):
   current_frame = 0
   frames = []
   actions = []
@@ -70,12 +73,12 @@ def FrameProcessor(detector, classifier):
   def process_frame(frame):
     nonlocal current_frame, frames, actions, detections
     current_frame += 1
-    if current_frame % classify_action_frame_steps == 0:
       frames.append(format_frame(frame))
-    if current_frame % detect_object_frame_steps == 0:
       print(f'Detect object: Frame {current_frame}')
       detections = detect_object(detector, frame)
-    if len(frames) == classify_action_num_frames:
       print(f'Classify action: Until frame {current_frame}')
       actions = classify_action(classifier, frames)
       frames = []

 import tensorflow as tf
 import numpy as np
+from configuration import Config
 # detections: (classes: list of class_name, boxes: list of [x1, y1, x2, y2])
 # actions: list of f'{action_name}: {confidence}'
+def format_frame(frame, config: Config):
+  frame = tf.image.convert_image_dtype(frame, tf.float32)
+  frame = tf.image.resize_with_pad(frame, *config.frame_size)
+  return frame
 def detect_object(detector, frame):
   result = detector(frame, classes=4, verbose=False)[0]
   classes = result.boxes.cls.numpy()
   )
   return detections
+def classify_action(classifier, frames, id_to_name):
   actions = []
   frames = np.array(frames)
   frames = tf.expand_dims(frames, 0)
   output = classifier(frames)
   confidences = tf.nn.softmax(output).numpy()[0]
   )
   return frame
+def FrameProcessor(detector, classifier, config: Config):
   current_frame = 0
   frames = []
   actions = []
   def process_frame(frame):
     nonlocal current_frame, frames, actions, detections
     current_frame += 1
+    if current_frame % config.classify_action_frame_steps == 0:
       frames.append(format_frame(frame))
+    if current_frame % config.detect_object_frame_steps == 0:
       print(f'Detect object: Frame {current_frame}')
       detections = detect_object(detector, frame)
+    if len(frames) == config.classify_action_num_frames:
       print(f'Classify action: Until frame {current_frame}')
       actions = classify_action(classifier, frames)
       frames = []

model.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import tensorflow as tf
+from tensorflow import keras
+from ultralytics import YOLO
+from official.projects.movinet.modeling.movinet import Movinet
+from official.projects.movinet.modeling.movinet_model import MovinetClassifier
+from configuration import Config
+class AttentionDenseClassifierHead(keras.layers.Layer):
+  def __init__(self, attention_heads, dense_units, dropout_rate=0.2, **kwargs):
+    super().__init__(**kwargs)
+    self.attention = keras.layers.MultiHeadAttention(num_heads=attention_heads, key_dim=1)
+    self.normalization = keras.layers.LayerNormalization(epsilon=1e-6)
+    self.dropout = keras.layers.Dropout(dropout_rate)
+    self.dense = keras.layers.Dense(dense_units, activation='softmax')
+  def call(self, x, training):
+    y = tf.expand_dims(x, -1)
+    y = self.attention(query=y, key=y, value=y)
+    y = tf.squeeze(y, axis=-1)
+    y = self.dropout(y, training=training)
+    y = self.normalization(x + y*0.01)
+    y = self.dense(y)
+    return y
+def build_movinet(output_size, config: Config):
+  model = MovinetClassifier(
+    backbone=Movinet(model_id=config.model_id),
+    num_classes=output_size)
+  model.build(config.input_shape)
+  return model
+def build_classifier_head(input_size, config: Config):
+  inputs = keras.Input(shape=(input_size,))
+  classifier = AttentionDenseClassifierHead(2, config.num_classes)(inputs)
+  model = keras.Model(inputs=inputs, outputs=classifier)
+  return model
+def build_model(movinet, classifier_head):
+  return keras.models.Sequential([movinet, classifier_head])
+def load_classifier(config: Config):
+  movinet = build_movinet(600, config)
+  classifier_head = build_classifier_head(600, config)
+  model = build_model(movinet, classifier_head)
+  model.load_weights(config.classifier_path)
+  return model
+def load_detector(config: Config):
+  return YOLO(config.detector_path)
+def compile_classifier(model, config: Config):
+  optimizer = keras.optimizers.Adam(learning_rate=config.learning_rate)
+  model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
-tensorflow
 numpy
 opencv-python
-tf-models-official
 ultralytics
 imgviz
 moviepy

 numpy
 opencv-python
+tensorflow==2.15.0
+tf-models-official==2.15.0
 ultralytics
 imgviz
 moviepy

weights/classifier-7.keras DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:13a9436ec0971fe72b53f03d9dd57b89a7c48a4cb82380e14b298c3e2d712f50
-size 25261904