import gradio as gr #import torch import yolov7 import subprocess import tempfile import time from pathlib import Path import uuid import cv2 import gradio as gr # # Images # #torch.hub.download_url_t # o_file('https://github.com/ultralytics/yolov5/raw/master/data/images/zidane.jpg', 'zidane.jpg') # #torch.hub.download_url_to_file('https://raw.githubusercontent.com/obss/sahi/main/tests/data/small-vehicles1.jpeg', 'small-vehicles1.jpeg') def image_fn( image: gr.inputs.Image = None, model_path: gr.inputs.Dropdown = None, image_size: gr.inputs.Slider = 640, conf_threshold: gr.inputs.Slider = 0.25, iou_threshold: gr.inputs.Slider = 0.45, ): """ YOLOv7 inference function Args: image: Input image model_path: Path to the model image_size: Image size conf_threshold: Confidence threshold iou_threshold: IOU threshold Returns: Rendered image """ model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False) model.conf = conf_threshold model.iou = iou_threshold results = model([image], size=image_size) return results.render()[0] def video_fn(model_path, video_file, conf_thres, iou_thres, start_sec, duration): model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False) start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec)) end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration)) suffix = Path(video_file).suffix clip_temp_file = tempfile.NamedTemporaryFile(suffix=suffix) subprocess.call( f"ffmpeg -y -ss {start_timestamp} -i {video_file} -to {end_timestamp} -c copy {clip_temp_file.name}".split() ) # Reader of clip file cap = cv2.VideoCapture(clip_temp_file.name) # This is an intermediary temp file where we'll write the video to # Unfortunately, gradio doesn't play too nice with videos rn so we have to do some hackiness # with ffmpeg at the end of the function here. with tempfile.NamedTemporaryFile(suffix=".mp4") as temp_file: out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*"MP4V"), 30, (1280, 720)) num_frames = 0 max_frames = duration * 30 while cap.isOpened(): try: ret, frame = cap.read() if not ret: break except Exception as e: print(e) continue print("FRAME DTYPE", type(frame)) out.write(model([frame], conf_thres, iou_thres)) num_frames += 1 print("Processed {} frames".format(num_frames)) if num_frames == max_frames: break out.release() # Aforementioned hackiness out_file = tempfile.NamedTemporaryFile(suffix="out.mp4", delete=False) subprocess.run(f"ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}".split()) return out_file.name image_interface = gr.Interface( fn=image_fn, inputs=[ gr.inputs.Image(type="pil", label="Input Image"), gr.inputs.Dropdown( choices=[ "alshimaa/SEE_model_yolo7", #"kadirnar/yolov7-v0.1", ], default="alshimaa/SEE_model_yolo7", label="Model", ) #gr.inputs.Slider(minimum=320, maximum=1280, default=640, step=32, label="Image Size") #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.25, step=0.05, label="Confidence Threshold"), #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.45, step=0.05, label="IOU Threshold") ], outputs=gr.outputs.Image(type="filepath", label="Output Image"), title="Smart Environmental Eye (SEE)", examples=[['image1.jpg', 'alshimaa/SEE_model_yolo7', 640, 0.25, 0.45], ['image2.jpg', 'alshimaa/SEE_model_yolo7', 640, 0.25, 0.45], ['image3.jpg', 'alshimaa/SEE_model_yolo7', 640, 0.25, 0.45]], cache_examples=True, theme='huggingface', ) video_interface = gr.Interface( fn=video_fn, inputs=[ gr.inputs.Video(source = "upload", type = "mp4", label = "Input Video"), gr.inputs.Dropdown( choices=[ "alshimaa/SEE_model_yolo7", #"kadirnar/yolov7-v0.1", ], default="alshimaa/SEE_model_yolo7", label="Model", ), ], outputs=gr.outputs.Video(type = "mp4", label = "Output Video"), # examples=[ # ["video.mp4", 0.25, 0.45, 0, 2], # ], title="Smart Environmental Eye (SEE)", cache_examples=True, theme='huggingface', ) if __name__ == "__main__": gr.TabbedInterface( [image_interface, video_interface], ["Run on Images", "Run on Videos"], ).launch()