import gradio as gr #import torch import yolov7 import subprocess import tempfile import time from pathlib import Path import cv2 import gradio as gr # Images #torch.hub.download_url_to_file('https://github.com/ultralytics/yolov5/raw/master/data/images/zidane.jpg', 'zidane.jpg') #torch.hub.download_url_to_file('https://raw.githubusercontent.com/obss/sahi/main/tests/data/small-vehicles1.jpeg', 'small-vehicles1.jpeg') def image_fn( image: gr.inputs.Image = None, model_path: gr.inputs.Dropdown = None, image_size: gr.inputs.Slider = 640, conf_threshold: gr.inputs.Slider = 0.25, iou_threshold: gr.inputs.Slider = 0.45, ): """ YOLOv7 inference function Args: image: Input image model_path: Path to the model image_size: Image size conf_threshold: Confidence threshold iou_threshold: IOU threshold Returns: Rendered image """ model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False) model.conf = conf_threshold model.iou = iou_threshold results = model([image], size=image_size) return results.render()[0] def video_fn(model_path, video_file, conf_thres, iou_thres, start_sec, duration): model = yolov7.load(model_path, device="cpu", hf_model=True, trace=False) start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec)) end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration)) suffix = Path(video_file).suffix clip_temp_file = tempfile.NamedTemporaryFile(suffix=suffix) subprocess.call( f"ffmpeg -y -ss {start_timestamp} -i {video_file} -to {end_timestamp} -c copy {clip_temp_file.name}".split() ) # Reader of clip file cap = cv2.VideoCapture(clip_temp_file.name) # This is an intermediary temp file where we'll write the video to # Unfortunately, gradio doesn't play too nice with videos rn so we have to do some hackiness # with ffmpeg at the end of the function here. with tempfile.NamedTemporaryFile(suffix=".mp4") as temp_file: out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*"MP4V"), 30, (1280, 720)) num_frames = 0 max_frames = duration * 30 while cap.isOpened(): try: ret, frame = cap.read() if not ret: break except Exception as e: print(e) continue print("FRAME DTYPE", type(frame)) out.write(model(frame, conf_thres, iou_thres)) num_frames += 1 print("Processed {} frames".format(num_frames)) if num_frames == max_frames: break out.release() # Aforementioned hackiness out_file = tempfile.NamedTemporaryFile(suffix="out.mp4", delete=False) subprocess.run(f"ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}".split()) return out_file.name image_interface = gr.Interface( fn=image_fn, inputs=[ gr.inputs.Image(type="pil", label="Input Image"), gr.inputs.Dropdown( choices=[ "alshimaa/model_baseline", "alshimaa/model_yolo7", #"kadirnar/yolov7-v0.1", ], default="alshimaa/model_baseline", label="Model", ) #gr.inputs.Slider(minimum=320, maximum=1280, default=640, step=32, label="Image Size") #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.25, step=0.05, label="Confidence Threshold"), #gr.inputs.Slider(minimum=0.0, maximum=1.0, default=0.45, step=0.05, label="IOU Threshold") ], outputs=gr.outputs.Image(type="filepath", label="Output Image"), title="Smart Environmental Eye (SEE)", examples=[['image1.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image2.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45], ['image3.jpg', 'alshimaa/model_yolo7', 640, 0.25, 0.45]], cache_examples=True, theme='huggingface', ) video_interface = gr.Interface( fn=video_fn, inputs=[ gr.Video(type="file"), gr.inputs.Dropdown( choices=[ "alshimaa/model_baseline", "alshimaa/model_yolo7", #"kadirnar/yolov7-v0.1", ], default="alshimaa/model_baseline", label="Model", ), ], outputs=gr.outputs.Video(type="filepath", format="mp4", label="Output Video"), # examples=[ # ["video.mp4", 0.25, 0.45, 0, 2], # ], title="Smart Environmental Eye (SEE)", cache_examples=True, theme='huggingface', ) if __name__ == "__main__": gr.TabbedInterface( [image_interface, video_interface], ["Run on Images", "Run on Videos"], ).launch() # import subprocess # import tempfile # import time # from pathlib import Path # import cv2 # import gradio as gr # from inferer import Inferer # pipeline = Inferer("alshimaa/model_yolo7", device='cuda') # def fn_image(image, conf_thres, iou_thres): # return pipeline(image, conf_thres, iou_thres) # def fn_video(video_file, conf_thres, iou_thres, start_sec, duration): # start_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec)) # end_timestamp = time.strftime("%H:%M:%S", time.gmtime(start_sec + duration)) # suffix = Path(video_file).suffix # clip_temp_file = tempfile.NamedTemporaryFile(suffix=suffix) # subprocess.call( # f"ffmpeg -y -ss {start_timestamp} -i {video_file} -to {end_timestamp} -c copy {clip_temp_file.name}".split() # ) # # Reader of clip file # cap = cv2.VideoCapture(clip_temp_file.name) # # This is an intermediary temp file where we'll write the video to # # Unfortunately, gradio doesn't play too nice with videos rn so we have to do some hackiness # # with ffmpeg at the end of the function here. # with tempfile.NamedTemporaryFile(suffix=".mp4") as temp_file: # out = cv2.VideoWriter(temp_file.name, cv2.VideoWriter_fourcc(*"MP4V"), 30, (1280, 720)) # num_frames = 0 # max_frames = duration * 30 # while cap.isOpened(): # try: # ret, frame = cap.read() # if not ret: # break # except Exception as e: # print(e) # continue # print("FRAME DTYPE", type(frame)) # out.write(pipeline(frame, conf_thres, iou_thres)) # num_frames += 1 # print("Processed {} frames".format(num_frames)) # if num_frames == max_frames: # break # out.release() # # Aforementioned hackiness # out_file = tempfile.NamedTemporaryFile(suffix="out.mp4", delete=False) # subprocess.run(f"ffmpeg -y -loglevel quiet -stats -i {temp_file.name} -c:v libx264 {out_file.name}".split()) # return out_file.name # image_interface = gr.Interface( # fn=fn_image, # inputs=[ # "image", # gr.Slider(0, 1, value=0.5, label="Confidence Threshold"), # gr.Slider(0, 1, value=0.5, label="IOU Threshold"), # ], # outputs=gr.Image(type="file"), # examples=[["image1.jpg", 0.5, 0.5], ["image2.jpg", 0.25, 0.45], ["image3.jpg", 0.25, 0.45]], # title="Smart Environmental Eye (SEE)", # allow_flagging=False, # allow_screenshot=False, # ) # video_interface = gr.Interface( # fn=fn_video, # inputs=[ # gr.Video(type="file"), # gr.Slider(0, 1, value=0.25, label="Confidence Threshold"), # gr.Slider(0, 1, value=0.45, label="IOU Threshold"), # gr.Slider(0, 10, value=0, label="Start Second", step=1), # gr.Slider(0, 10 if pipeline.device.type != 'cpu' else 3, value=4, label="Duration", step=1), # ], # outputs=gr.Video(type="file", format="mp4"), # # examples=[ # # ["video.mp4", 0.25, 0.45, 0, 2], # # ], # title="Smart Environmental Eye (SEE)", # allow_flagging=False, # allow_screenshot=False, # ) # if __name__ == "__main__": # gr.TabbedInterface( # [image_interface, video_interface], # ["Run on Images", "Run on Videos"], # ).launch()