{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## YOLO Inference Code For Single Image" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import argparse\n", "import os\n", "from pathlib import Path\n", "import torch\n", "from models.common import DetectMultiBackend\n", "from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams\n", "from utils.general import LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh\n", "from utils.plots import Annotator, colors, save_one_box, save_block_box\n", "from utils.torch_utils import select_device, smart_inference_mode\n", "\n", "def load_model(weights, device, dnn, data, fp16):\n", " device = select_device(device)\n", " model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=fp16)\n", " return model\n", "\n", "def run_single_image_inference(model, img_path, save_dir, stride, names, pt, conf_thres=0.35, iou_thres=0.7, max_det=100, augment=True, visualize=False, line_thickness=1, hide_labels=False, hide_conf=False, save_conf=False, save_crop=False, save_block=True, imgsz=(640, 640), vid_stride=1, bs=1, classes=None, agnostic_nms=False, save_txt=True, save_img=True):\n", " dataset = LoadImages(img_path, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) # Load image from file\n", " imgsz = check_img_size(imgsz, s=stride) \n", "\n", " # Run inference\n", " model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup\n", " seen, windows, dt = 0, [], (Profile(), Profile(), Profile())\n", " for path, im, im0s, vid_cap, s in dataset:\n", " with dt[0]:\n", " im = torch.from_numpy(im).to(model.device)\n", " im = im.half() if model.fp16 else im.float() # uint8 to fp16/32\n", " im /= 255 # 0 - 255 to 0.0 - 1.0\n", " print(im.shape)\n", " if len(im.shape) == 3:\n", " im = im[None] # expand for batch dim\n", "\n", " # Inference\n", " with dt[1]:\n", " visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False\n", " pred = model(im, augment=augment, visualize=visualize)\n", "\n", " # NMS\n", " with dt[2]:\n", " pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)\n", "\n", " # Second-stage classifier (optional)\n", " # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)\n", "\n", " # Process predictions\n", " for i, det in enumerate(pred): # per image\n", " seen += 1\n", " p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)\n", "\n", " p = Path(p) # to Path\n", " txt_path = save_dir\n", " s += '%gx%g ' % im.shape[2:] # print string\n", " gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh\n", " imc = im0.copy() if save_crop or save_block else im0 # for save_crop\n", " annotator = Annotator(im0, line_width=line_thickness, example=str(names))\n", " if len(det):\n", " # Rescale boxes from img_size to im0 size\n", " det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()\n", "\n", " # Print results\n", " for c in det[:, 5].unique():\n", " n = (det[:, 5] == c).sum() # detections per class\n", " s += f\"{n} {names[int(c)]}{'s' * (n > 1)}, \" # add to string\n", "\n", " # Write results\n", " for *xyxy, conf, cls in reversed(det):\n", " if save_txt: # Write to file\n", " xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh\n", " line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format\n", " print(line)\n", " with open(f'{txt_path}.txt', 'a') as f:\n", " f.write(('%g ' * len(line)).rstrip() % line + '\\n')\n", "\n", " if save_img or save_crop or view_img: # Add bbox to image\n", " c = int(cls) # integer class\n", " label = None if hide_labels else (names[c] if hide_conf else f'{names[c]} {conf:.2f}')\n", " annotator.box_label(xyxy, label, color=colors(c, True))\n", " if save_crop:\n", " save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True)\n", " # if save_block:\n", " # save_block_box(xyxy, imc, file=save_dir / 'block' / names[c] / f'{p.stem}.jpg', BGR=True)\n", "\n", " # Save results (image with detections)\n", " if save_img:\n", " cv2.imwrite(os.path.join(save_dir, f\"{Path(path).stem}.jpg\"), im0)\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "YOLOv5 🚀 v7.0-72-g064365d Python-3.9.18 torch-1.12.1 CUDA:0 (Tesla V100-DGXS-32GB, 32505MiB)\n", "\n", "Fusing layers... \n", "YOLOv5m summary: 212 layers, 20873139 parameters, 0 gradients, 47.9 GFLOPs\n" ] } ], "source": [ "weights_path = '/home/knowledge/workspace/bhushan/application/block_diagram/blosum/yolov5/runs/train/best_all/weights/best.pt'\n", "source_path = '/home/knowledge/workspace/bhushan/test/blosum/input_image/155502.png' \n", "save_dir_path = '/home/knowledge/workspace/bhushan/application/block_diagram/blosum/yolov5/runs/detect/'\n", "\n", "model = load_model(weights_path, device='cuda:0', dnn=False, data='/home/knowledge/workspace/bhushan/application/block_diagram/blosum/yolov5/data/mydata.yaml', fp16=False)\n", "stride, names, pt = model.stride, model.names, model.pt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "torch.Size([3, 320, 640])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([3, 576, 640])\n", "(tensor(4., device='cuda:0'), 0.8008631467819214, 0.6260387897491455, 0.35388410091400146, 0.10249307751655579)\n", "(tensor(4., device='cuda:0'), 0.24969173967838287, 0.6204985976219177, 0.35141798853874207, 0.0886426568031311)\n", "(tensor(4., device='cuda:0'), 0.24660912156105042, 0.7756232619285583, 0.2786683142185211, 0.0886426568031311)\n", "(tensor(4., device='cuda:0'), 0.24722564220428467, 0.7749307751655579, 0.20591862499713898, 0.07340720295906067)\n", "(tensor(4., device='cuda:0'), 0.24907521903514862, 0.30193907022476196, 0.20468556880950928, 0.07479224354028702)\n", "(tensor(4., device='cuda:0'), 0.7990135550498962, 0.7777008414268494, 0.32305794954299927, 0.09279778599739075)\n", "(tensor(4., device='cuda:0'), 0.24907521903514862, 0.9217451810836792, 0.357583224773407, 0.09556786715984344)\n", "(tensor(4., device='cuda:0'), 0.2533908784389496, 0.619113564491272, 0.2774352729320526, 0.07756232470273972)\n", "(tensor(4., device='cuda:0'), 0.7977805137634277, 0.6198061108589172, 0.16522811353206635, 0.0484764538705349)\n", "(tensor(0., device='cuda:0'), 0.2515413165092468, 0.5484764575958252, 0.02466091327369213, 0.05263157933950424)\n", "(tensor(3., device='cuda:0'), 0.2429099828004837, 0.29986149072647095, 0.3255240321159363, 0.10664819926023483)\n", "(tensor(0., device='cuda:0'), 0.24969173967838287, 0.8490304946899414, 0.025893958285450935, 0.049861494451761246)\n", "(tensor(3., device='cuda:0'), 0.23982737958431244, 0.7756232619285583, 0.3366214632987976, 0.10249307751655579)\n", "(tensor(4., device='cuda:0'), 0.5221948027610779, 0.7396121621131897, 0.02096177637577057, 0.02770083025097847)\n", "(tensor(4., device='cuda:0'), 0.0462392121553421, 0.4376731216907501, 0.01849568448960781, 0.02770083025097847)\n", "(tensor(3., device='cuda:0'), 0.24599260091781616, 0.4681440591812134, 0.3267571032047272, 0.11911357194185257)\n", "(tensor(0., device='cuda:0'), 0.5184956789016724, 0.7770082950592041, 0.20345252752304077, 0.022160664200782776)\n", "(tensor(4., device='cuda:0'), 0.28113439679145813, 0.8462603688240051, 0.022194821387529373, 0.02770083025097847)\n", "(tensor(4., device='cuda:0'), 0.2817509174346924, 0.5463988780975342, 0.025893958285450935, 0.031855955719947815)\n", "(tensor(0., device='cuda:0'), 0.24907521903514862, 0.8490304946899414, 0.019728729501366615, 0.060941826552152634)\n", "(tensor(0., device='cuda:0'), 0.24845869839191437, 0.6945983171463013, 0.01849568448960781, 0.07063712179660797)\n", "(tensor(0., device='cuda:0'), 0.24907521903514862, 0.22714681923389435, 0.019728729501366615, 0.05540166050195694)\n", "(tensor(4., device='cuda:0'), 0.24722564220428467, 0.1558171808719635, 0.13440197706222534, 0.031855955719947815)\n", "(tensor(4., device='cuda:0'), 0.24969173967838287, 0.3005540072917938, 0.14180023968219757, 0.060941826552152634)\n", "(tensor(0., device='cuda:0'), 0.521578311920166, 0.621191143989563, 0.20468556880950928, 0.02631578966975212)\n", "(tensor(0., device='cuda:0'), 0.24845869839191437, 0.3795013725757599, 0.01849568448960781, 0.06371191143989563)\n", "(tensor(4., device='cuda:0'), 0.7959309220314026, 0.7728531956672668, 0.24784216284751892, 0.06371191143989563)\n", "(tensor(4., device='cuda:0'), 0.2509247958660126, 0.617035984992981, 0.22564734518527985, 0.06509695202112198)\n", "(tensor(4., device='cuda:0'), 0.24660912156105042, 0.7756232619285583, 0.14303329586982727, 0.06371191143989563)\n", "(tensor(2., device='cuda:0'), 0.24660912156105042, 0.6198061108589172, 0.3403205871582031, 0.09279778599739075)\n", "(tensor(4., device='cuda:0'), 0.7996301054954529, 0.623268723487854, 0.10480887442827225, 0.03601108118891716)\n", "(tensor(4., device='cuda:0'), 0.5147965550422668, 0.036703601479530334, 0.4500616490840912, 0.0429362878203392)\n", "(tensor(0., device='cuda:0'), 0.24845869839191437, 0.5484764575958252, 0.01849568448960781, 0.06371191143989563)\n", "(tensor(2., device='cuda:0'), 0.24660912156105042, 0.9196676015853882, 0.360049307346344, 0.0969529077410698)\n", "(tensor(4., device='cuda:0'), 0.24907521903514862, 0.9217451810836792, 0.20715166628360748, 0.06509695202112198)\n", "(tensor(4., device='cuda:0'), 0.59186190366745, 0.26454293727874756, 0.1208384707570076, 0.03878116235136986)\n", "(tensor(2., device='cuda:0'), 0.7996301054954529, 0.7756232619285583, 0.3612823784351349, 0.10249307751655579)\n", "(tensor(4., device='cuda:0'), 0.24660912156105042, 0.4674515128135681, 0.11344020068645477, 0.08725761622190475)\n", "(tensor(0., device='cuda:0'), 0.055487051606178284, 0.621191143989563, 0.07398273795843124, 0.3254847526550293)\n", "(tensor(4., device='cuda:0'), 0.34401974081993103, 0.6869806051254272, 0.1504315733909607, 0.03878116235136986)\n", "(tensor(4., device='cuda:0'), 0.33230578899383545, 0.3843490183353424, 0.13193587958812714, 0.03739612177014351)\n", "(tensor(4., device='cuda:0'), 0.5123304724693298, 0.5817174315452576, 0.1541306972503662, 0.03878116235136986)\n", "(tensor(2., device='cuda:0'), 0.7983970642089844, 0.6239612102508545, 0.3588162660598755, 0.10110803693532944)\n", "(tensor(1., device='cuda:0'), 0.24784216284751892, 0.1585872620344162, 0.34278666973114014, 0.09833794832229614)\n", "(tensor(0., device='cuda:0'), 0.6004931926727295, 0.4376731216907501, 0.3945746123790741, 0.279778391122818)\n" ] } ], "source": [ "run_single_image_inference(model, source_path, save_dir_path, stride, names, pt)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "app_block", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.18" } }, "nbformat": 4, "nbformat_minor": 2 }