from ultralytics import YOLO
from PIL import Image
import gradio as gr
from huggingface_hub import snapshot_download
import os
import zipfile
import cv2
from tqdm import tqdm

def load_model(repo_id):
    download_dir = snapshot_download(repo_id)
    print(download_dir)
    with zipfile.ZipFile(os.path.join(download_dir, "best_int8_openvino_model.zip"), 'r') as zip_ref:
        zip_ref.extractall(download_dir)
    path  = os.path.join(download_dir, "best_int8_openvino_model")
    print(path)
    detection_model = YOLO(path, task='detect')
    return detection_model


def predict(pilimg):

    source = pilimg
    # x = np.asarray(pilimg)
    # print(x.shape)
    result = detection_model.predict(source, conf=0.5, iou=0.6)
    img_bgr = result[0].plot()
    out_pilimg = Image.fromarray(img_bgr[..., ::-1])  # RGB-order PIL image
    
    return out_pilimg


def predict_video(video_input):
    # Open the video file

    video_reader = cv2.VideoCapture(video_input)

    nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))
    fps = video_reader.get(cv2.CAP_PROP_FPS)

    video_out_filepath = f"{video_input}_output.mp4"
    
    video_writer = cv2.VideoWriter(video_out_filepath,
                            cv2.VideoWriter_fourcc(*'mp4v'),
                            fps,
                            (frame_w, frame_h))

    # Loop through the video frames
    for i in tqdm(range(nb_frames)):
        # Read a frame from the video
        success, frame = video_reader.read()

        if success:
            results = detection_model(frame, device='cpu')

            # Visualize the results on the frame
            annotated_frame = results[0].plot()

            # Write the annotated frame
            video_writer.write(annotated_frame)

    video_reader.release()
    video_writer.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)

    return video_out_filepath


REPO_ID = "GranularFireplace/food_yolov8"
detection_model = load_model(REPO_ID)

image_interface = gr.Interface(
    fn=predict,
    inputs=gr.Image(type="pil", label="Upload photo"),
    outputs=gr.Image(type="pil", label="Result")
)

video_interface = gr.Interface(
    fn=predict_video,
    inputs=gr.Video(label="Upload video"),
    outputs=gr.Video(label="Result")
)

gr.TabbedInterface(
    [image_interface, video_interface],
    ["Photo", "Video"]
).launch(share=True)