# importing modules
import cv2
import torch
from torchvision import transforms
from torchvision.models import detection
import numpy as np

# checks if their is a gpu present, if not uses a cpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# mainly consists of the classes present in the coco dataset
classes = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
           'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
           'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
           'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
           'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
           'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
           'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
           'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
           'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
           'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
           'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
           'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']

colors = np.random.uniform(0, 255, size=(len(classes), 3))  # assigning a color to each classes of the data


# calling the Faster RCNN ResNet50 model
model = detection.fasterrcnn_resnet50_fpn_v2(pretrained=True, progress=True, pretrained_backbone=True).to(device)
print(model.eval())  # prints out the architecture of the model


# function to carry out object detection on images.
def img_detect(img_path):
    image = cv2.imread(img_path)  # reads the model using OpenCV
    image = cv2.resize(image, (640, 480))
    orig = image.copy()

    # changing the colorspace from BGR to RGB (since Pytorch trains only RGB image)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    image = image.transpose((2, 0, 1))  # swapping the color channels from channels last to channels first

    image = np.expand_dims(image, axis=0)  # add batch dimension to the image
    image = image / 255.0  # scaling image from (0,255) to (0,1)
    image = torch.FloatTensor(image)  # changes the numpy array to a tensor.

    image = image.to(device)
    detections = model(image)[0]  # the image is passed to the model to get the bounding boxes

    people = 0
    # loop to construct bounding boxes on image.
    for i in range(0, len(detections["boxes"])):
        confidence = detections["scores"][i]  # get confidence score of each object in the image
        idx = int(detections["labels"][i])  # identifying the id of each of the classes in the image
        box = detections["boxes"][i].detach().cpu().numpy()  # gets the coordinates for the bounding boxes
        (X_1, Y_1, X_2, Y_2) = box.astype("int")

        if confidence > 0.75 and idx == 1:
            # matching the label index with its classes and its probability
            label = f"{classes[idx]}, {idx}: {confidence* 100}%"
            print(f"[INFO] {label}")
            people += 1
            cv2.rectangle(orig, (X_1, Y_1), (X_2, Y_2), colors[idx], 2)  # draw bounding boxes over each object
            y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15

            # adds the label text to the image.
            cv2.putText(orig, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
            cv2.putText(orig, f"Number of People: {people}", (5, 19), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)

    return orig


# function to perform object detection in videos
def video_detection(video_path):
    video = cv2.VideoCapture(video_path)
    # frame_width = video.get(3)
    # frame_height = video.get(4)

    # out = cv2.VideoWriter(vid_out, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame_width, frame_height))

    while video.isOpened():
        ret, frame = video.read()
        vid = frame.copy()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = transforms.functional.to_tensor(frame)
        frame = frame.to(device)
        vid_detect = model([frame])[0]

        for i in range(0, len(vid_detect["boxes"])):
            confidence = vid_detect["scores"][i]

            if confidence > 0.75:
                idx = int(vid_detect["labels"][i])
                box = vid_detect["boxes"][i].detach().cpu().numpy()
                (X_1, Y_1, X_2, Y_2) = box.astype("int")

                label = f"{classes[idx]}, {idx}: {confidence* 100}%"
                print(f"[INFO] {label}")

                cv2.rectangle(vid, (X_1, Y_1), (X_2, Y_2), colors[idx], 2)
                y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15

                cv2.putText(vid, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)

    return vid