import tempfile
import time
import cv2
import gradio as gr
import tensorflow as tf
from moviepy.editor import VideoFileClip
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip

from configuration import Config
from model import load_classifier, load_detector
from inference import format_frame, detect_object, classify_action, draw_boxes, draw_classes

config = Config()
print(f'TensorFlow {tf.__version__}')

print(f'Load classifier from {config.classifier_path}')
classifier = load_classifier(config)
classifier.trainable = False
classifier.summary()

print('Load detector.')
detector = load_detector(config)

def fn(video: gr.Video, actions: list[int]):
  yield None, None
  if video is None:
    return
  print('Process video.')
  do_detect = 0 in actions
  do_classify = 1 in actions
  toggle = True
  if not do_detect and not do_classify:
    yield None, video
    return
  with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as f:
    output = f.name
    clip = VideoFileClip(video)
    processed_frames = []
    frames = []
    actions = []
    detections = ([], [])
    for i, frame in enumerate(clip.iter_frames()):
      if do_classify:
        if i % config.classify_action_frame_step == 0:
          frames.append(format_frame(frame, config))
        if len(frames) == config.classify_action_num_frames:
          print(f'Classify action: Until frame {i}')
          actions = classify_action(classifier, frames, config.id_to_name)
          frames = []
      if do_detect:
        if i % config.detect_object_frame_step == 0:
          print(f'Detect object: Frame {i}')
          detections = detect_object(detector, frame)
      if do_detect:
        frame = draw_boxes(frame, detections, actions, do_classify)
      elif len(actions) > 0:
        frame = draw_classes(frame, actions)
      processed_frames.append(frame)
      if i % config.yield_frame_steps == 0:
        print(f'Yield latest frame: Frame {i}')
        quality = 9
        image_array = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        _, image_encoded = cv2.imencode('.jpg', image_array, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
        with tempfile.NamedTemporaryFile(suffix='.jpeg') as f:
          f.write(image_encoded)
          yield f.name, None
          if toggle:
            time.sleep(0.5)
            toggle = False
      if i % config.classify_action_frame_step != 0 and i % config.detect_object_frame_step != 0:
          time.sleep(0.1)
    processed_clip = ImageSequenceClip(processed_frames, clip.fps)
    processed_clip.audio = clip.audio
    processed_clip.write_videofile(output, fps=clip.fps, audio_codec='aac', logger=None)
  yield frame, output

inputs = [
  gr.Video(sources=['upload'], label='输入视频片段'),
  gr.CheckboxGroup(
    ['飞机检测', '飞机行为识别'],
    label='执行任务',
    info='可以选择仅执行飞机检测任务或仅执行飞机行为识别任务作为演示。',
    type='index')]
outputs = [
  gr.Image(interactive=False, label='最新处理的视频帧'),
  gr.Video(interactive=False, label='输出视频片段')]

examples = [
  ['examples/ZFLFDfovqls_001310_001320.mp4'], # cspell: disable-line
  ['examples/Zv7GyH-fpEY_2023.0_2033.0.mp4']]

iface = gr.Interface(
  title='基于深度学习的视频流飞机检测与行为识别系统',
  description='上传待检测的视频片段，提交后实时返回最新处理的视频帧，全部处理完成后可以下载标记后的视频片段.',
  theme='soft',
  fn=fn,
  inputs=inputs,
  outputs=outputs,
  examples=examples,
  cache_examples=False)
iface.launch()