import gradio as gr import spaces from super_gradients.training import models from deep_sort_torch.deep_sort.deep_sort import DeepSort from super_gradients.training import models from super_gradients.training.pipelines.pipelines import DetectionPipeline from model_tools import get_prediction, get_color import cv2 import datetime import torch import os import gradio as gr import numpy as np np.float = float np.int = int np.object = object np.bool = bool dir = os.getcwd()+ '/uploads/' inp = gr.Image(type="pil") output = gr.Image(type="pil") examples=[[dir +"cafe_fall.mp4","Fall in cafe"], [dir +"slip.mp4","Run and Fall2"], [dir +"skate.mp4","Skate and Fall"], [dir +"kitchen.mp4","Fall in kitchen"], [dir +"studycam.mp4","Experiment fall"]] ckpt_path = os.getcwd() + "/checkpoints/best181-8376/ckpt_latest.pth" best_model = models.get('yolo_nas_s', num_classes=1, checkpoint_path=ckpt_path) best_model = best_model.to("cuda" if torch.cuda.is_available() else "cpu") #best_model = models.get("yolo_nas_s", pretrained_weights="coco") best_model.eval() #### Initiatize tracker tracker_model = os.getcwd() + "/checkpoints/ckpt.t7" tracker = DeepSort(model_path=tracker_model,max_age=30,nn_budget=100, max_iou_distance=0.7, max_dist=0.2) out_path=dir filename = 'demo.webm' description = "Yolo model to detect if a person is falling or fallen with deepsort to track how long the subject has fallen.\ If the duration crosses a threshold of 5s, the bounding box will turn red and the subject be labelled as IMMOBILE." @spaces.GPU def vid_predict(media): pipeline = DetectionPipeline( model=best_model, image_processor=best_model._image_processor, post_prediction_callback=best_model.get_post_prediction_callback(iou=0.25, conf=0.70, nms_top_k=100, # Example value, adjust based on your needs max_predictions=50, # Example value, adjust based on your needs multi_label_per_box=False, # Example value, adjust based on your needs class_agnostic_nms=False), class_names=best_model._class_names, ) print("Running Predict") save_to = os.path.join(out_path, filename) cap = cv2.VideoCapture(media) if cap.isOpened(): width = cap.get(3) # float `widtqh` print('width',width) height = cap.get(4) print('Height',height) fps = cap.get(cv2.CAP_PROP_FPS) # or fps = cap.get(5) print('fps:', fps) # float `fps` frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) # or frame_count = cap.get(7) print('frames count:', frame_count) # float `frame_count` out = cv2.VideoWriter(save_to, cv2.VideoWriter_fourcc(*'VP08'), fps, (640,640)) fall_records = {} frame_id = 0 while True: frame_id += 1 if frame_id > frame_count: break print('frame_id', frame_id) ret, img = cap.read() #img = cv2.resize(img, (1280, 720),cv2.INTER_AREA) # if height > 720: # print("Reshaped") img = cv2.resize(img, (640, 640),cv2.INTER_AREA) width, height = img.shape[1], img.shape[0] ### recalibrate color channels to rgb for use in model prediction img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) overlay = img.copy() ### create list objects needed for tracking detects = [] conffs = [] if ret: print("START ") model_predictions = get_prediction(best_model, img_rgb, pipeline) print(model_predictions) classnames = ['Fall-Detected'] results = model_predictions bboxes = results.bboxes_xyxy if len(bboxes) >= 1: confs = results.confidence labels = results.labels for bbox, conf, label in zip(bboxes, confs, labels): label = int(label) conf = np.round(conf, decimals=2) x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3] x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) ### for tracking model bw = abs(x1 - x2) bh = abs(y1 - y2) cx , cy = x1 + bw//2, y1 + bh//2 coords = [cx, cy, bw, bh] detects.append(coords) conffs.append([float(conf)]) ### Tracker xywhs = torch.tensor(detects) conffs = torch.tensor(conffs) #tracker_results = deepsort.update(xywhs, confss,oids, img) tracker_results = tracker.update(xywhs, conffs, img_rgb) ### conduct check on track_records now = datetime.datetime.now() if len(fall_records.keys()) >=1: #print(fall_records) ### reset timer for calculating immobility to 0 if time lapsed since last detection of fall more than N seconds fall_records = {id: item if (now - item['present']).total_seconds() <= 3.0 else {'start':now, 'present': now} for id, item in fall_records.items() } if len(tracker_results)>=1: for track,conf,label in zip(tracker_results,conffs, labels): conf = conf.numpy()[0] duration = 0 minute = 0 sec = 0 x1, y1 ,x2, y2, id = track x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) if id in fall_records.keys(): ### record present time present = datetime.datetime.now() fall_records[id].update({'present': present}) ### calculate duration duration = fall_records[id]['present'] - fall_records[id]['start'] duration = int(duration.total_seconds()) ### record status fall_records[id].update({'status': 'IMMOBILE'}) if duration >= 5 else fall_records[id].update({'status': None}) print(f"Frame:{frame_id} ID: {id} Conf: {conf} Duration:{duration} Status: {fall_records[id]['status']}") print(fall_records[id]) minute, sec = divmod(duration,60) else: start = datetime.datetime.now() fall_records[id] = {'start': start} fall_records[id].update({'present': start}) classname = classnames[int(label)] color = get_color(id*20) if duration < 5: display_text = f"{str(classname)} ({str(id)}) {str(conf)} Elapsed: {round(minute)}min{round(sec)}s" (w, h), _ = cv2.getTextSize( display_text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 1) cv2.rectangle(img,(x1, y1), (x2, y2),color,1) cv2.rectangle(overlay,(x1, y1), (x2, y2),color,1) cv2.rectangle(overlay, (min(x1,int(width)-w), max(1,y1 - 20)), (min(x1+ w,int(width)) , max(21,y1)), color, cv2.FILLED) else: display_text = f"{str(classname)} ({str(id)}) {str(conf)} IMMOBILE: {round(minute)}min{round(sec)}s " (w, h), _ = cv2.getTextSize( display_text, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 1) cv2.rectangle(img,(x1, y1), (x2, y2),(0,0,255),1) cv2.rectangle(overlay,(x1, y1), (x2, y2),(0,0,255),1) cv2.rectangle(overlay, (min(x1,int(width)-w), max(1,y1 - 20)), (min(x1+ w,int(width)) , max(21,y1)), (0,0,255), cv2.FILLED) cv2.putText(img,display_text, (min(x1,int(width)-w), max(21,y1)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0),2) cv2.putText(overlay,display_text, (min(x1,int(width)-w), max(21,y1)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0),2) alpha = 0.6 masked = cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0) out.write(masked) cap.release() out.release() cv2.destroyAllWindows() return save_to def run(): demo = gr.Interface(fn=vid_predict, inputs=gr.Video(format='mp4'), outputs=gr.Video(), examples=examples, description=description,cache_examples=False, title='Fall detection and tracking with deep sort') demo.launch(server_port=7860) if __name__ == "__main__": run()