import gradio as gr
import spaces
from super_gradients.training import models
from deep_sort_torch.deep_sort.deep_sort import DeepSort
from super_gradients.training import models
from super_gradients.training.pipelines.pipelines import DetectionPipeline
from model_tools import  get_prediction, get_color
import cv2
import datetime
import torch
import os
import gradio as gr
import numpy as np

np.float = float  
np.int = int 
np.object = object   
np.bool = bool

dir = os.getcwd()+ '/uploads/'

inp = gr.Image(type="pil")
output = gr.Image(type="pil")

examples=[[dir +"cafe_fall.mp4","Fall in cafe"],
          [dir +"slip.mp4","Run and Fall2"],
          [dir +"skate.mp4","Skate and Fall"],
          [dir +"kitchen.mp4","Fall in kitchen"],
          [dir +"studycam.mp4","Experiment fall"]] 

ckpt_path =  os.getcwd() + "/checkpoints/best181-8376/ckpt_latest.pth"
best_model = models.get('yolo_nas_s',
                        num_classes=1,
                        checkpoint_path=ckpt_path)

best_model = best_model.to("cuda" if torch.cuda.is_available() else "cpu")                        
#best_model = models.get("yolo_nas_s", pretrained_weights="coco")                        
best_model.eval()

#### Initiatize tracker
tracker_model = os.getcwd() + "/checkpoints/ckpt.t7"
tracker = DeepSort(model_path=tracker_model,max_age=30,nn_budget=100, max_iou_distance=0.7, max_dist=0.2)
out_path=dir 
filename = 'demo.webm'

description = "Yolo model to detect if a person is falling or fallen with deepsort to track how long the subject has fallen.\
            If the duration crosses a threshold of 5s, the bounding box will turn red and the subject be labelled as IMMOBILE."
@spaces.GPU
def vid_predict(media):

    pipeline = DetectionPipeline(
                model=best_model,
                image_processor=best_model._image_processor,
                post_prediction_callback=best_model.get_post_prediction_callback(iou=0.25, conf=0.70,
                                                                                nms_top_k=100,  # Example value, adjust based on your needs
                                                                                max_predictions=50,  # Example value, adjust based on your needs
                                                                                multi_label_per_box=False,  # Example value, adjust based on your needs
                                                                                class_agnostic_nms=False),
                class_names=best_model._class_names,
            )    
    
    print("Running Predict")
    save_to = os.path.join(out_path, filename)
    cap = cv2.VideoCapture(media)
    
    if cap.isOpened():

        width  = cap.get(3)  # float `widtqh`
        print('width',width)
        height = cap.get(4)
        print('Height',height)
        fps = cap.get(cv2.CAP_PROP_FPS)
        # or
        fps = cap.get(5)
        
        print('fps:', fps)  # float `fps`
        
        frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
        # or frame_count = cap.get(7)
        
        print('frames count:', frame_count)  # float `frame_count`    
     
    out = cv2.VideoWriter(save_to, cv2.VideoWriter_fourcc(*'VP08'), fps, (640,640))
    fall_records = {}
    frame_id = 0
    while True:
        frame_id += 1
        if frame_id > frame_count:
                break
        print('frame_id', frame_id)
    
        ret, img = cap.read()
        #img = cv2.resize(img, (1280, 720),cv2.INTER_AREA)
        # if height > 720:
        #     print("Reshaped")
        img = cv2.resize(img, (640, 640),cv2.INTER_AREA)
        width, height =  img.shape[1],  img.shape[0]

        ### recalibrate color channels to rgb for use in model prediction 
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        overlay = img.copy()

        ### create list objects needed for tracking
        detects = []
        conffs = []


        if ret:
            print("START ")    
            model_predictions = get_prediction(best_model, img_rgb, pipeline)
            print(model_predictions)
            classnames = ['Fall-Detected']
            results = model_predictions
            bboxes = results.bboxes_xyxy
            
            if len(bboxes) >= 1:
                confs = results.confidence
                labels = results.labels

                for bbox, conf, label in zip(bboxes, confs, labels):
                    label = int(label)
                    conf = np.round(conf, decimals=2)

                    x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
                    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

                    ### for tracking model
                    bw = abs(x1 - x2)
                    bh = abs(y1 - y2)
                    cx , cy = x1 + bw//2, y1 + bh//2

                    coords = [cx, cy, bw, bh]
                    detects.append(coords)
                    conffs.append([float(conf)])     
           
                ### Tracker
                xywhs = torch.tensor(detects)
                conffs = torch.tensor(conffs)
                #tracker_results = deepsort.update(xywhs, confss,oids, img)
                tracker_results = tracker.update(xywhs, conffs, img_rgb)

                ### conduct check on track_records
                now = datetime.datetime.now()
                if len(fall_records.keys()) >=1:
                    #print(fall_records)

                    ### reset timer for calculating immobility to 0 if time lapsed since last detection of fall more than N seconds
                    fall_records = {id: item if (now - item['present']).total_seconds() <= 3.0 else  {'start':now, 'present': now} for id, item in fall_records.items() }


                if len(tracker_results)>=1:            
                    for track,conf,label in zip(tracker_results,conffs, labels):
                        conf = conf.numpy()[0]
                        duration = 0
                        minute = 0
                        sec = 0
                        x1, y1 ,x2, y2, id  = track
                        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

                        if id in fall_records.keys():
                            ### record present time
                            present = datetime.datetime.now()
                            fall_records[id].update({'present': present})

                            ### calculate duration 
                            duration =  fall_records[id]['present'] - fall_records[id]['start']
                            duration = int(duration.total_seconds())
                            
                            ### record status
                            fall_records[id].update({'status': 'IMMOBILE'}) if duration >= 5 else fall_records[id].update({'status': None})
                            print(f"Frame:{frame_id} ID:  {id} Conf: {conf}  Duration:{duration} Status: {fall_records[id]['status']}")
                            print(fall_records[id])
                            minute, sec = divmod(duration,60)
                            
                        else:
                            start = datetime.datetime.now()
                            fall_records[id] = {'start': start}
                            fall_records[id].update({'present': start})

                        classname = classnames[int(label)]
    

                        color = get_color(id*20)
                        if duration < 5:
                            display_text = f"{str(classname)} ({str(id)}) {str(conf)} Elapsed: {round(minute)}min{round(sec)}s"
                            (w, h), _ = cv2.getTextSize(
                                display_text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 1)  
                            cv2.rectangle(img,(x1, y1), (x2, y2),color,1)
                            cv2.rectangle(overlay,(x1, y1), (x2, y2),color,1)
                            cv2.rectangle(overlay, (min(x1,int(width)-w), max(1,y1 - 20)), (min(x1+ w,int(width)) , max(21,y1)), color, cv2.FILLED) 
                        else:
                            display_text = f"{str(classname)} ({str(id)}) {str(conf)} IMMOBILE: {round(minute)}min{round(sec)}s "
                            (w, h), _ = cv2.getTextSize(
                                display_text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 1)                        
                            cv2.rectangle(img,(x1, y1), (x2, y2),(0,0,255),1)
                            cv2.rectangle(overlay,(x1, y1), (x2, y2),(0,0,255),1)
                            cv2.rectangle(overlay, (min(x1,int(width)-w), max(1,y1 - 20)), (min(x1+ w,int(width)) , max(21,y1)), (0,0,255), cv2.FILLED) 

                        cv2.putText(img,display_text, (min(x1,int(width)-w), max(21,y1)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0),2)
                        cv2.putText(overlay,display_text, (min(x1,int(width)-w), max(21,y1)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0),2)                    
                        
        alpha = 0.6
        masked = cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0)
        out.write(masked)
 
    cap.release()
    out.release() 

    cv2.destroyAllWindows()

    return save_to

def run():
    demo = gr.Interface(fn=vid_predict, inputs=gr.Video(format='mp4'), outputs=gr.Video(), examples=examples, description=description,cache_examples=False, title='Fall detection and tracking with deep sort')
    demo.launch(server_port=7860)     

if __name__ == "__main__":
    run()