from deepsparse import Pipeline import time import gradio as gr from PIL import Image import numpy from annotate import * markdownn = ''' # 🏞 YOLOv5 Object Detection Pipeline with DeepSparse Object detection involves localizing objects in an image and drawing a bounding box around them. ![Object detection](https://huggingface.co/spaces/neuralmagic/cv-yolo/resolve/main/object.png) ### What is DeepSparse? DeepSparse is an inference runtime offering GPU-class performance on CPUs and APIs to integrate ML into your application. Sparsification is a powerful technique for optimizing models for inference, reducing the compute needed with a limited accuracy tradeoff. DeepSparse is designed to take advantage of model sparsity, enabling you to deploy models with the flexibility and scalability of software on commodity CPUs with the best-in-class performance of hardware accelerators, enabling you to standardize operations and reduce infrastructure costs. Similar to Hugging Face, DeepSparse provides off-the-shelf pipelines for computer vision and NLP that wrap the model with proper pre- and post-processing to run performantly on CPUs by using sparse models. ### Inference API Example YOLOv5 by Ultralytics is an object detection model in the You Only Look Once (YOLO) family. YOLOv5 translated the model from Darknet to PyTorch. YOLOv5 uses data augmentation strategies, such as mosaic augmentation, that help in accurately detecting small objects. Porting the model to PyTorch also enabled training and inference in lower precision, speeding up inference and training. Here is sample code for an YOLO object detection pipeline: ```python from deepsparse import Pipeline pipeline = Pipeline.create(task='yolo',model_path="zoo:cv/detection/yolov5-l/pytorch/ultralytics/coco/pruned_quant-aggressive_95",class_names=None, model_config=None, ) input_image = "my_image.png" # path to input image inference = pipeline(input_image) print(inference) ``` ### Use Case Description An example use case for object detection is people counting in a mall. This type of computation needs to happen in real-time. Since object detection models are usually large, this can only be achieved using expensive GPUs. What if the same can be achieved using CPUs? Sparsified and quantized YOLOv5 models enable you to achieve GPU-class performance on commodity CPUs. Here's an illustration of the [YOLOv5 model achieving GPU-Level performance on CPUs](https://www.youtube.com/embed/gGErxSqf05o). ![Fruit Segmentation](https://huggingface.co/spaces/neuralmagic/cv-yolo/resolve/main/yolo.gif) [Want to train a sparse model on your data? Checkout the documentation on sparse transfer learning](https://docs.neuralmagic.com/use-cases/object-detection/sparsifying) ''' task = "yolo" sparse_pipeline = Pipeline.create( task=task, model_path="zoo:cv/detection/yolov5-s/pytorch/ultralytics/coco/pruned-aggressive_96", class_names='coco', # if using custom model, pass in a list of classes the model will clasify or a path to a json file containing them model_config=None, # if using custom model, pass in the path to a local model config file here ) def run_pipeline(image): sparse_start = time.perf_counter() sparse_output = sparse_pipeline(images=[image], confidence_threshold=0.2, nms_threshold=0.5) sparse_annotation = annotate_image(image=image, prediction=sparse_output) sparse_result = Image.fromarray(sparse_annotation) sparse_end = time.perf_counter() sparse_duration = (sparse_end - sparse_start) * 1000.0 return sparse_result, sparse_duration with gr.Blocks() as demo: with gr.Row(): with gr.Column(): gr.Markdown(markdownn) with gr.Column(): gr.Markdown(""" ### 🌃 YOLOv5 Object Detection Demo Using [ ultralytics/yolov5](https://sparsezoo.neuralmagic.com/models/cv%2Fdetection%2Fyolov5-l%2Fpytorch%2Fultralytics%2Fcoco%2Fpruned_quant-aggressive_95) """) image = gr.Image() btn = gr.Button("Detect Objects with YOLOv5") sparse_answers = gr.Image(label="Sparse model answers") sparse_duration = gr.Number(label="Sparse Latency (ms):") gr.Examples([["Fruits.png"],["horses.jpg"],["pets.jpg"]],inputs=[image],) btn.click( run_pipeline, inputs=[image], outputs=[sparse_answers, sparse_duration], ) if __name__ == "__main__": demo.launch()