import gradio as gr
import numpy as np
import torch
import cv2
import os
from vision.ssd.mobilenetv1_ssd import create_mobilenetv1_ssd, create_mobilenetv1_ssd_predictor

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device: %s" % device)
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
default_models = {
    "ssd": "weights/mb1-ssd-bestmodel.pth",
    "label_path": "weights/labels.txt"
    }

class_names = [name.strip() for name in open(default_models["label_path"]).readlines()]
net = create_mobilenetv1_ssd(len(class_names), is_test=True)
try:
    net.load(default_models["ssd"])
    predictor = create_mobilenetv1_ssd_predictor(net, candidate_size=200)
except: 
    print("The net type is wrong. It should be one of mb1-ssd and mb1-ssd-lite.")

colors = [np.random.choice(range(256), size=3) for i in range(len(class_names))]


def detection(image):
    boxes, labels, probs = predictor.predict(image, 10, 0.4)
    for i in range(boxes.size(0)):
        box = boxes[i, :]
        box = box.numpy()
        box = np.array(box, dtype=np.int32)
        color = colors[labels[i]]
        cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (int(color[0]), int(color[1]), int(color[2])), thickness=4)
        label = f"{class_names[labels[i]]}: {probs[i]:.2f}"
        # cv2.putText(image, label,
        #             (box[0] + 20, box[1] + 40),
        #             cv2.FONT_HERSHEY_SIMPLEX,
        #             1,  # font scale
        #             (255, 0, 255),
        #             2)  # line type
    s = f"Found {len(probs)} objects"
    return image, s


title = " AISeed AI Application Demo "
description = "# A Demo of Deep Learning for Object Detection"
example_list = [["examples/" + example] for example in os.listdir("examples")]

with gr.Blocks() as demo:
    demo.title = title
    gr.Markdown(description)
    with gr.Tabs():
        with gr.TabItem("for Images"):
            with gr.Row():
                with gr.Column():
                    im = gr.Image(label="Input Image")
                    im_2 = gr.Image(label="Output Image")
                with gr.Column():
                    text = gr.Textbox(label="Number of objects")
                    btn1 = gr.Button(value="Who wears mask?")
                    btn1.click(detection, inputs=[im], outputs=[im_2, text])
                    
                    gr.Examples(examples=example_list,
                        inputs=[im],
                        outputs=[im_2])
        # with gr.TabItem("for Videos"):
        #     with gr.Row():
        #         with gr.Column():
        #             text1 = gr.Textbox(label="Number of objects")
        #         with gr.Column():
        #             text2 = gr.Textbox(label="Number of objects")
        
        with gr.Tab("for streaming"):
            with gr.Row():
                
                input_video =  gr.Image(source="webcam", streaming=True)
                with gr.Column():
                    output_video = gr.Image(label="Video")
                    text1 = gr.Textbox(label="Number of objects")
                    input_video.change(detection, inputs = [input_video], outputs=[output_video, text1], show_progress=False)
                
                
if __name__ == "__main__":
    demo.launch()