Spaces:

Testys
/

Human_Detector

Runtime error

App Files Files Community

Testys commited on Apr 12, 2023

Commit

59d1adb

•

1 Parent(s): 7124dcc

Commiting obj files

Browse files

Files changed (4) hide show

deploy.py +44 -0
faster_rcnn.ipynb +899 -0
faster_rcnn.py +108 -0
readme.md +3 -0

deploy.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import gradio as gr
+from faster_rcnn import img_detect, video_detection
+choice = "Image"
+# Define detection function
+def detect(input):
+    # Call the appropriate detection function based on the choice
+    if choice == "Image":
+        return img_detect(input)
+    elif choice == "Video":
+        return video_detection(input)
+    else:
+        return img_detect(input)  # Default to image detection
+# Define Gradio interface components
+input = gr.Interface(
+    fn=detect,
+    inputs=[gr.Image(filepath="upload"), gr.Video(source="upload")],
+    outputs=[gr.Image(), gr.PlayableVideo()],
+    title="Object Detector",
+    layout="blocks",
+)
+# Define function to be triggered by button click
+def on_button_click():
+    global choice
+    choice = demo.inputs[0]  # Get the choice from the Dropdown component
+    input.set_input(input)  # Update the input component based on the choice
+# Create Gradio interface using blocks layout
+with gr.Blocks() as demo:
+    choices = gr.Dropdown(
+        choices=["Image", "Video"],
+        label="What type of object would you like to detect?",
+    )
+    det = gr.Button('Detect')
+    det.click(on_button_click)
+if __name__ == "__main__":
+    input.launch()

faster_rcnn.ipynb ADDED Viewed

	@@ -0,0 +1,899 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true,
+    "id": "6oY4rQOq6RfM",
+    "pycharm": {
+     "is_executing": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import cv2\n",
+    "import torch\n",
+    "from torchvision import transforms\n",
+    "from torchvision.models import detection\n",
+    "from PIL import Image\n",
+    "import pickle\n",
+    "import numpy as np\n",
+    "import sys\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "QKAf8q-l6RfX",
+    "pycharm": {
+     "is_executing": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# checks if their is a gpu present, if not uses a cpu\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "id": "rYthXSEK6RfZ"
+   },
+   "outputs": [],
+   "source": [
+    "# mainly consists of the classes present in the coco dataset\n",
+    "classes = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',\n",
+    "           'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',\n",
+    "           'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',\n",
+    "           'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',\n",
+    "           'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',\n",
+    "           'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',\n",
+    "           'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',\n",
+    "           'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',\n",
+    "           'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',\n",
+    "           'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',\n",
+    "           'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',\n",
+    "           'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n",
+    "\n",
+    "colors = np.random.uniform(0,255, size=(len(classes), 3)) # assigning a color to each classes of the data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 225,
+     "referenced_widgets": [
+      "4fe268bf915b44038ba07a91c87a63ec",
+      "4e5a3302234341e0ba24b9054b129f99",
+      "8b1891821afc478684249870a93c961c",
+      "3cb0c74f631f49b4bcc3ffcdae17561f",
+      "7ee00c90f28643d697e697130f8e57b9",
+      "d02b913a20794bb7bb3c866370dcc7af",
+      "c7c1a147321d46f3ae3cffc419a04e2e",
+      "ecf3b69a10054324933edcaa322fcbfc",
+      "38f2071ea44a4ea991cae2303f70912d",
+      "541293c130504c69bb5ccf82780670d1",
+      "8ff4b383565e4e8c85c796561bd8a532"
+     ]
+    },
+    "id": "x55IKiUr6Rfc",
+    "outputId": "e98c3095-2629-47ac-f6d9-e6640263b111"
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\Testys\\anaconda3\\lib\\site-packages\\torchvision\\models\\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\Testys\\anaconda3\\lib\\site-packages\\torchvision\\models\\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1`. You can also use `weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT` to get the most up-to-date weights.\n",
+      "  warnings.warn(msg)\n",
+      "C:\\Users\\Testys\\anaconda3\\lib\\site-packages\\torchvision\\models\\_utils.py:208: UserWarning: The parameter 'pretrained_backbone' is deprecated since 0.13 and may be removed in the future, please use 'weights_backbone' instead.\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\Testys\\anaconda3\\lib\\site-packages\\torchvision\\models\\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights_backbone' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights_backbone=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights_backbone=ResNet50_Weights.DEFAULT` to get the most up-to-date weights.\n",
+      "  warnings.warn(msg)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": "FasterRCNN(\n  (transform): GeneralizedRCNNTransform(\n      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n      Resize(min_size=(800,), max_size=1333, mode='bilinear')\n  )\n  (backbone): BackboneWithFPN(\n    (body): IntermediateLayerGetter(\n      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n      (bn1): FrozenBatchNorm2d(64, eps=0.0)\n      (relu): ReLU(inplace=True)\n      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n      (layer1): Sequential(\n        (0): Bottleneck(\n          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(64, eps=0.0)\n          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(64, eps=0.0)\n          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(256, eps=0.0)\n          (relu): ReLU(inplace=True)\n          (downsample): Sequential(\n            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n            (1): FrozenBatchNorm2d(256, eps=0.0)\n          )\n        )\n        (1): Bottleneck(\n          (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(64, eps=0.0)\n          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(64, eps=0.0)\n          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(256, eps=0.0)\n          (relu): ReLU(inplace=True)\n        )\n        (2): Bottleneck(\n          (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(64, eps=0.0)\n          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(64, eps=0.0)\n          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(256, eps=0.0)\n          (relu): ReLU(inplace=True)\n        )\n      )\n      (layer2): Sequential(\n        (0): Bottleneck(\n          (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(128, eps=0.0)\n          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(128, eps=0.0)\n          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(512, eps=0.0)\n          (relu): ReLU(inplace=True)\n          (downsample): Sequential(\n            (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n            (1): FrozenBatchNorm2d(512, eps=0.0)\n          )\n        )\n        (1): Bottleneck(\n          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(128, eps=0.0)\n          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(128, eps=0.0)\n          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(512, eps=0.0)\n          (relu): ReLU(inplace=True)\n        )\n        (2): Bottleneck(\n          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(128, eps=0.0)\n          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(128, eps=0.0)\n          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(512, eps=0.0)\n          (relu): ReLU(inplace=True)\n        )\n        (3): Bottleneck(\n          (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(128, eps=0.0)\n          (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(128, eps=0.0)\n          (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(512, eps=0.0)\n          (relu): ReLU(inplace=True)\n        )\n      )\n      (layer3): Sequential(\n        (0): Bottleneck(\n          (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(256, eps=0.0)\n          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(256, eps=0.0)\n          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n          (relu): ReLU(inplace=True)\n          (downsample): Sequential(\n            (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n            (1): FrozenBatchNorm2d(1024, eps=0.0)\n          )\n        )\n        (1): Bottleneck(\n          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(256, eps=0.0)\n          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(256, eps=0.0)\n          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n          (relu): ReLU(inplace=True)\n        )\n        (2): Bottleneck(\n          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(256, eps=0.0)\n          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(256, eps=0.0)\n          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n          (relu): ReLU(inplace=True)\n        )\n        (3): Bottleneck(\n          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(256, eps=0.0)\n          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(256, eps=0.0)\n          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n          (relu): ReLU(inplace=True)\n        )\n        (4): Bottleneck(\n          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(256, eps=0.0)\n          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(256, eps=0.0)\n          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n          (relu): ReLU(inplace=True)\n        )\n        (5): Bottleneck(\n          (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(256, eps=0.0)\n          (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(256, eps=0.0)\n          (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n          (relu): ReLU(inplace=True)\n        )\n      )\n      (layer4): Sequential(\n        (0): Bottleneck(\n          (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(512, eps=0.0)\n          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(512, eps=0.0)\n          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(2048, eps=0.0)\n          (relu): ReLU(inplace=True)\n          (downsample): Sequential(\n            (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n            (1): FrozenBatchNorm2d(2048, eps=0.0)\n          )\n        )\n        (1): Bottleneck(\n          (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(512, eps=0.0)\n          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(512, eps=0.0)\n          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(2048, eps=0.0)\n          (relu): ReLU(inplace=True)\n        )\n        (2): Bottleneck(\n          (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn1): FrozenBatchNorm2d(512, eps=0.0)\n          (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n          (bn2): FrozenBatchNorm2d(512, eps=0.0)\n          (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n          (bn3): FrozenBatchNorm2d(2048, eps=0.0)\n          (relu): ReLU(inplace=True)\n        )\n      )\n    )\n    (fpn): FeaturePyramidNetwork(\n      (inner_blocks): ModuleList(\n        (0): Conv2dNormActivation(\n          (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))\n        )\n        (1): Conv2dNormActivation(\n          (0): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))\n        )\n        (2): Conv2dNormActivation(\n          (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))\n        )\n        (3): Conv2dNormActivation(\n          (0): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))\n        )\n      )\n      (layer_blocks): ModuleList(\n        (0-3): 4 x Conv2dNormActivation(\n          (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n        )\n      )\n      (extra_blocks): LastLevelMaxPool()\n    )\n  )\n  (rpn): RegionProposalNetwork(\n    (anchor_generator): AnchorGenerator()\n    (head): RPNHead(\n      (conv): Sequential(\n        (0): Conv2dNormActivation(\n          (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n          (1): ReLU(inplace=True)\n        )\n      )\n      (cls_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1))\n      (bbox_pred): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))\n    )\n  )\n  (roi_heads): RoIHeads(\n    (box_roi_pool): MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'], output_size=(7, 7), sampling_ratio=2)\n    (box_head): TwoMLPHead(\n      (fc6): Linear(in_features=12544, out_features=1024, bias=True)\n      (fc7): Linear(in_features=1024, out_features=1024, bias=True)\n    )\n    (box_predictor): FastRCNNPredictor(\n      (cls_score): Linear(in_features=1024, out_features=91, bias=True)\n      (bbox_pred): Linear(in_features=1024, out_features=364, bias=True)\n    )\n  )\n)"
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# calling the Faster-RCNN ResNet50 model\n",
+    "model = detection.fasterrcnn_resnet50_fpn(pretrained=True, progress=True, pretrained_backbone=True).to(device)\n",
+    "model.eval()  # prints out the architecture of the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "background_save": true
+    },
+    "id": "-jLNrDyZ6Rfg",
+    "pycharm": {
+     "is_executing": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "image = cv2.imread(os.path.join(\".\",\"data\", \"fam1.HEIC\")) # reads the model using OpenCV\n",
+    "\n",
+    "image = cv2.resize(image, (640, 480))\n",
+    "\n",
+    "orig = image.copy()\n",
+    "\n",
+    "\n",
+    "image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # changing the colorspace from BGR to RGB (since Pytorch trains only RGB image)\n",
+    "image = image.transpose((2, 0, 1))  # swapping the color channels from channels last to channels first\n",
+    "\n",
+    "image = np.expand_dims(image, axis=0)  # add batch dimension to the image\n",
+    "image = image / 255.0  # scaling image from (0,255) to (0,1)\n",
+    "image = torch.FloatTensor(image) # changes the numpy array to a tensor.\n",
+    "\n",
+    "\n",
+    "image= image.to(device)\n",
+    "# the image is passed to the model to get the bounding boxes\n",
+    "detections = model(image)[0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "colab": {
+     "background_save": true
+    },
+    "id": "0FcDqO0l6Rfi",
+    "outputId": "0b988da8-7b38-499b-fc3e-2ca7ffdd629f"
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[INFO] person, 1: 98.54804992675781%\n",
+      "[INFO] person, 1: 98.00418090820312%\n",
+      "[INFO] person, 1: 88.78005981445312%\n",
+      "[INFO] person, 1: 80.33210754394531%\n",
+      "[INFO] person, 1: 78.6150894165039%\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": "32"
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "people = 0\n",
+    "for i in range(0, len(detections[\"boxes\"])):\n",
+    "    confidence = detections[\"scores\"][i]  # get confidence score of each object in the image\n",
+    "    idx = int(detections[\"labels\"][i])  # identifying the id of each of the classes in the image\n",
+    "    box = detections[\"boxes\"][i].detach().cpu().numpy()  # gets the coordinates for the bounding boxes\n",
+    "    (X_1, Y_1, X_2, Y_2) = box.astype(\"int\")\n",
+    "\n",
+    "    if confidence > 0.75 and idx == 1:\n",
+    "        # matching the label index with its classes and its probability\n",
+    "        label = f\"{classes[idx]}, {idx}: {confidence* 100}%\"\n",
+    "        print(f\"[INFO] {label}\")\n",
+    "        people += 1\n",
+    "        cv2.rectangle(orig, (X_1, Y_1), (X_2, Y_2), colors[idx], 2)  # draw bounding boxes over each object\n",
+    "        y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15\n",
+    "\n",
+    "        # adds the label text to the image.\n",
+    "        cv2.putText(orig, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)\n",
+    "    print(f\"People: {people}\")\n",
+    "    cv2.putText(orig, f\"Number of People: {people}\", (5, 19), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)\n",
+    "\n",
+    "cv2.imwrite(\"./data/detected_img.jpg\", orig)\n",
+    "cv2.imshow(\"Image Detection\", orig)\n",
+    "cv2.waitKey(0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "colab": {
+     "background_save": true
+    },
+    "id": "Vf0X-Azk6Rfk"
+   },
+   "outputs": [],
+   "source": [
+    "# implementation for videos\n",
+    "def video_processing(video_path):\n",
+    "    video = cv2.VideoCapture(video_path)\n",
+    "    frame_width = int(video.get(3))\n",
+    "    frame_height = int(video.get(4))\n",
+    " \n",
+    "    # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.\n",
+    "    out = cv2.VideoWriter('./data/outpy.mp4',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))\n",
+    "\n",
+    "    while video.isOpened():\n",
+    "        ret, frame = video.read()\n",
+    "        vid = frame.copy()\n",
+    "        if not ret:\n",
+    "          break\n",
+    "        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n",
+    "        frame = transforms.functional.to_tensor(frame)\n",
+    "        frame = frame.to(device)\n",
+    "        vid_detect = model([frame])[0]    \n",
+    "\n",
+    "        # return vid, vid_detect\n",
+    "        for i in range(0, len(vid_detect[\"boxes\"])):\n",
+    "          confidence = vid_detect[\"scores\"][i]\n",
+    "\n",
+    "          if confidence > 0.75:\n",
+    "            idx = int(vid_detect[\"labels\"][i])\n",
+    "            box = vid_detect[\"boxes\"][i].detach().cpu().numpy()\n",
+    "            (X_1, Y_1, X_2, Y_2) = box.astype(\"int\")\n",
+    "\n",
+    "            label = f\"{classes[idx]}, {idx}: {confidence* 100}%\"\n",
+    "            print(f\"[INFO] {label}\")\n",
+    "\n",
+    "            cv2.rectangle(vid, (X_1, Y_1), (X_2, Y_2), colors[idx], 2)\n",
+    "            y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15\n",
+    "\n",
+    "            cv2.putText(vid, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)\n",
+    "\n",
+    "        # cv2.imwrite(\"vid_detection.mp4\", vid)\n",
+    "    \n",
+    "          # cv2.imshow(\"Video Detection\", vid)\n",
+    "          # key = cv2.waitKey(40) & 0xFF\n",
+    "          # if key == ord(\" \"):\n",
+    "          #   break\n",
+    "    \n",
+    "    out.release()\n",
+    "    cv2.destroyAllWindows()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "background_save": true
+    },
+    "id": "pvzjfhUM6Rfm",
+    "pycharm": {
+     "is_executing": true
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[INFO] person, 1: 99.96479797363281%\n",
+      "[INFO] person, 1: 99.8873291015625%\n",
+      "[INFO] tie, 32: 84.8359375%\n",
+      "[INFO] skateboard, 41: 84.439697265625%\n",
+      "[INFO] person, 1: 99.8567123413086%\n",
+      "[INFO] tie, 32: 88.97736358642578%\n",
+      "[INFO] person, 1: 99.85549926757812%\n",
+      "[INFO] tie, 32: 85.5542221069336%\n",
+      "[INFO] person, 1: 99.85945892333984%\n",
+      "[INFO] tie, 32: 76.01469421386719%\n",
+      "[INFO] person, 1: 99.91976928710938%\n",
+      "[INFO] tie, 32: 75.80422973632812%\n",
+      "[INFO] person, 1: 99.92200469970703%\n",
+      "[INFO] person, 1: 99.91909790039062%\n",
+      "[INFO] person, 1: 99.87517547607422%\n",
+      "[INFO] person, 1: 99.95360565185547%\n",
+      "[INFO] person, 1: 99.94219207763672%\n",
+      "[INFO] person, 1: 99.911865234375%\n",
+      "[INFO] person, 1: 99.9067153930664%\n",
+      "[INFO] person, 1: 99.92170715332031%\n",
+      "[INFO] person, 1: 99.92486572265625%\n",
+      "[INFO] tie, 32: 81.37924194335938%\n",
+      "[INFO] person, 1: 99.9447021484375%\n",
+      "[INFO] person, 1: 99.90923309326172%\n",
+      "[INFO] person, 1: 99.9104232788086%\n",
+      "[INFO] person, 1: 99.9041976928711%\n",
+      "[INFO] person, 1: 99.92522430419922%\n",
+      "[INFO] person, 1: 99.90491485595703%\n",
+      "[INFO] person, 1: 99.91775512695312%\n",
+      "[INFO] person, 1: 99.91704559326172%\n",
+      "[INFO] person, 1: 99.8878173828125%\n",
+      "[INFO] person, 1: 99.89239501953125%\n",
+      "[INFO] person, 1: 99.89933013916016%\n",
+      "[INFO] tie, 32: 77.39177703857422%\n",
+      "[INFO] person, 1: 99.92431640625%\n",
+      "[INFO] tie, 32: 86.86376190185547%\n",
+      "[INFO] person, 1: 99.9134750366211%\n",
+      "[INFO] tie, 32: 90.91716766357422%\n",
+      "[INFO] person, 1: 99.86553192138672%\n",
+      "[INFO] tie, 32: 83.31379699707031%\n",
+      "[INFO] person, 1: 99.89295959472656%\n",
+      "[INFO] tie, 32: 82.80715942382812%\n",
+      "[INFO] person, 1: 99.93063354492188%\n",
+      "[INFO] tie, 32: 84.20972442626953%\n",
+      "[INFO] person, 1: 99.90483856201172%\n",
+      "[INFO] tie, 32: 86.09645080566406%\n",
+      "[INFO] person, 1: 99.91349792480469%\n",
+      "[INFO] tie, 32: 86.98382568359375%\n",
+      "[INFO] person, 1: 99.92369079589844%\n",
+      "[INFO] tie, 32: 80.03042602539062%\n",
+      "[INFO] person, 1: 99.92707061767578%\n",
+      "[INFO] tie, 32: 91.95155334472656%\n",
+      "[INFO] person, 1: 99.92926025390625%\n",
+      "[INFO] tie, 32: 88.84310150146484%\n",
+      "[INFO] person, 1: 99.93949890136719%\n",
+      "[INFO] tie, 32: 92.20435333251953%\n",
+      "[INFO] person, 1: 99.94140625%\n",
+      "[INFO] tie, 32: 91.74589538574219%\n",
+      "[INFO] person, 1: 99.87340545654297%\n",
+      "[INFO] person, 1: 99.90251159667969%\n",
+      "[INFO] tie, 32: 77.56298065185547%\n",
+      "[INFO] person, 1: 99.92112731933594%\n",
+      "[INFO] tie, 32: 88.31008911132812%\n",
+      "[INFO] person, 1: 99.84620666503906%\n",
+      "[INFO] person, 1: 99.82284545898438%\n",
+      "[INFO] person, 1: 75.87650299072266%\n",
+      "[INFO] person, 1: 99.90447235107422%\n",
+      "[INFO] person, 1: 99.90357971191406%\n",
+      "[INFO] tie, 32: 87.86799621582031%\n",
+      "[INFO] person, 1: 99.89325714111328%\n",
+      "[INFO] person, 1: 99.89271545410156%\n",
+      "[INFO] tie, 32: 85.549560546875%\n",
+      "[INFO] person, 1: 99.90028381347656%\n",
+      "[INFO] tie, 32: 89.2087173461914%\n",
+      "[INFO] person, 1: 99.81275177001953%\n",
+      "[INFO] tie, 32: 92.40033721923828%\n",
+      "[INFO] person, 1: 99.83216857910156%\n",
+      "[INFO] tie, 32: 90.578857421875%\n",
+      "[INFO] person, 1: 99.80609893798828%\n",
+      "[INFO] skateboard, 41: 96.36973571777344%\n",
+      "[INFO] tie, 32: 96.11731719970703%\n",
+      "[INFO] skateboard, 41: 82.39974212646484%\n",
+      "[INFO] person, 1: 99.69965362548828%\n",
+      "[INFO] tie, 32: 95.91572570800781%\n",
+      "[INFO] person, 1: 99.69015502929688%\n",
+      "[INFO] tie, 32: 96.45115661621094%\n",
+      "[INFO] person, 1: 99.815673828125%\n",
+      "[INFO] tie, 32: 96.90648651123047%\n",
+      "[INFO] person, 1: 99.90100860595703%\n",
+      "[INFO] tie, 32: 85.73446655273438%\n",
+      "[INFO] person, 1: 99.87108612060547%\n",
+      "[INFO] tie, 32: 83.69398498535156%\n",
+      "[INFO] person, 1: 99.92547607421875%\n",
+      "[INFO] tie, 32: 81.19554138183594%\n",
+      "[INFO] person, 1: 99.88804626464844%\n",
+      "[INFO] person, 1: 99.8199462890625%\n",
+      "[INFO] tie, 32: 82.75387573242188%\n",
+      "[INFO] person, 1: 99.88672637939453%\n",
+      "[INFO] tie, 32: 88.2742919921875%\n",
+      "[INFO] person, 1: 99.87617492675781%\n",
+      "[INFO] tie, 32: 97.07515716552734%\n",
+      "[INFO] person, 1: 99.87181091308594%\n",
+      "[INFO] tie, 32: 98.3101577758789%\n",
+      "[INFO] person, 1: 99.88245391845703%\n",
+      "[INFO] tie, 32: 97.0067138671875%\n",
+      "[INFO] person, 1: 99.80060577392578%\n",
+      "[INFO] tie, 32: 97.07807922363281%\n",
+      "[INFO] person, 1: 99.88915252685547%\n",
+      "[INFO] tie, 32: 92.22811126708984%\n",
+      "[INFO] person, 1: 99.92195129394531%\n",
+      "[INFO] tie, 32: 89.54218292236328%\n",
+      "[INFO] person, 1: 99.9444580078125%\n",
+      "[INFO] tie, 32: 88.38265228271484%\n",
+      "[INFO] person, 1: 99.93598937988281%\n",
+      "[INFO] tie, 32: 85.4147720336914%\n",
+      "[INFO] person, 1: 99.80413055419922%\n",
+      "[INFO] person, 1: 99.83075714111328%\n",
+      "[INFO] person, 1: 99.8851089477539%\n",
+      "[INFO] tie, 32: 83.0019760131836%\n",
+      "[INFO] person, 1: 99.87308502197266%\n",
+      "[INFO] tie, 32: 87.57421112060547%\n",
+      "[INFO] person, 1: 99.92283630371094%\n",
+      "[INFO] tie, 32: 86.23603820800781%\n",
+      "[INFO] person, 1: 99.90347290039062%\n",
+      "[INFO] tie, 32: 75.47425842285156%\n",
+      "[INFO] person, 1: 99.849853515625%\n",
+      "[INFO] person, 1: 99.78257751464844%\n",
+      "[INFO] tie, 32: 84.9027328491211%\n",
+      "[INFO] person, 1: 99.9006118774414%\n",
+      "[INFO] tie, 32: 81.21675872802734%\n",
+      "[INFO] person, 1: 99.89569091796875%\n",
+      "[INFO] tie, 32: 83.1388931274414%\n",
+      "[INFO] person, 1: 99.8825454711914%\n",
+      "[INFO] tie, 32: 84.85138702392578%\n",
+      "[INFO] person, 1: 99.90423583984375%\n",
+      "[INFO] person, 1: 99.91820526123047%\n",
+      "[INFO] person, 1: 99.90950775146484%\n",
+      "[INFO] person, 1: 99.91889953613281%\n",
+      "[INFO] person, 1: 99.88500213623047%\n",
+      "[INFO] tie, 32: 85.9897689819336%\n",
+      "[INFO] person, 1: 99.83943176269531%\n",
+      "[INFO] person, 1: 99.72062683105469%\n",
+      "[INFO] skateboard, 41: 92.7118148803711%\n",
+      "[INFO] person, 1: 99.57286834716797%\n",
+      "[INFO] skateboard, 41: 94.51634216308594%\n",
+      "[INFO] person, 1: 99.65888214111328%\n",
+      "[INFO] skateboard, 41: 93.3429946899414%\n",
+      "[INFO] person, 1: 99.45445251464844%\n",
+      "[INFO] skateboard, 41: 87.95608520507812%\n",
+      "[INFO] person, 1: 99.67414855957031%\n",
+      "[INFO] tie, 32: 89.96955108642578%\n",
+      "[INFO] skateboard, 41: 79.02975463867188%\n",
+      "[INFO] person, 1: 99.56875610351562%\n",
+      "[INFO] person, 1: 99.49411010742188%\n",
+      "[INFO] person, 1: 99.68108367919922%\n",
+      "[INFO] person, 1: 99.48446655273438%\n",
+      "[INFO] person, 1: 99.75424194335938%\n",
+      "[INFO] person, 1: 99.55342102050781%\n",
+      "[INFO] person, 1: 99.61231231689453%\n",
+      "[INFO] person, 1: 99.55381774902344%\n",
+      "[INFO] person, 1: 99.65033721923828%\n",
+      "[INFO] person, 1: 99.75348663330078%\n",
+      "[INFO] person, 1: 99.45124053955078%\n",
+      "[INFO] person, 1: 99.50060272216797%\n",
+      "[INFO] person, 1: 99.36222076416016%\n",
+      "[INFO] person, 1: 99.62764739990234%\n",
+      "[INFO] person, 1: 99.55170440673828%\n",
+      "[INFO] person, 1: 99.5832290649414%\n",
+      "[INFO] person, 1: 99.52233123779297%\n",
+      "[INFO] person, 1: 99.27761840820312%\n",
+      "[INFO] person, 1: 78.00102233886719%\n",
+      "[INFO] person, 1: 99.5089111328125%\n",
+      "[INFO] person, 1: 99.4920883178711%\n",
+      "[INFO] person, 1: 99.27015686035156%\n"
+     ]
+    }
+   ],
+   "source": [
+    "video_processing(\"./data/a1.mp4\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "_cKB9rwY6Rfo",
+    "pycharm": {
+     "is_executing": true
+    }
+   },
+   "outputs": [],
+   "source": [
+    "vid= cv2.VideoCapture(\"ai.mp4\")\n",
+    "ret = True\n",
+    "while ret:\n",
+    "  ret, frame = vid.read()\n",
+    "\n",
+    "  if ret:\n",
+    "        cv2.imshow(\"Video Window\", frame)\n",
+    "        cv2.waitKey(40)\n",
+    "\n",
+    "# vid.release()\n",
+    "# cv2.destroyAllWindows()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "id": "OHtxLb7kg8tf"
+   },
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n",
+     "is_executing": true
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n",
+     "is_executing": true
+    }
+   }
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "provenance": []
+  },
+  "gpuClass": "standard",
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "38f2071ea44a4ea991cae2303f70912d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "3cb0c74f631f49b4bcc3ffcdae17561f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_541293c130504c69bb5ccf82780670d1",
+      "placeholder": "",
+      "style": "IPY_MODEL_8ff4b383565e4e8c85c796561bd8a532",
+      "value": " 160M/160M [00:02&lt;00:00, 73.2MB/s]"
+     }
+    },
+    "4e5a3302234341e0ba24b9054b129f99": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_d02b913a20794bb7bb3c866370dcc7af",
+      "placeholder": "",
+      "style": "IPY_MODEL_c7c1a147321d46f3ae3cffc419a04e2e",
+      "value": "100%"
+     }
+    },
+    "4fe268bf915b44038ba07a91c87a63ec": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_4e5a3302234341e0ba24b9054b129f99",
+       "IPY_MODEL_8b1891821afc478684249870a93c961c",
+       "IPY_MODEL_3cb0c74f631f49b4bcc3ffcdae17561f"
+      ],
+      "layout": "IPY_MODEL_7ee00c90f28643d697e697130f8e57b9"
+     }
+    },
+    "541293c130504c69bb5ccf82780670d1": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "7ee00c90f28643d697e697130f8e57b9": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "8b1891821afc478684249870a93c961c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_ecf3b69a10054324933edcaa322fcbfc",
+      "max": 167502836,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_38f2071ea44a4ea991cae2303f70912d",
+      "value": 167502836
+     }
+    },
+    "8ff4b383565e4e8c85c796561bd8a532": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "c7c1a147321d46f3ae3cffc419a04e2e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "d02b913a20794bb7bb3c866370dcc7af": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "ecf3b69a10054324933edcaa322fcbfc": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    }
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}

faster_rcnn.py ADDED Viewed

	@@ -0,0 +1,108 @@

+# importing modules
+import cv2
+import torch
+from torchvision import transforms
+from torchvision.models import detection
+import numpy as np
+# checks if their is a gpu present, if not uses a cpu
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# mainly consists of the classes present in the coco dataset
+classes = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+           'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
+           'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+           'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
+           'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+           'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
+           'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+           'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+           'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
+           'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+           'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
+           'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
+colors = np.random.uniform(0, 255, size=(len(classes), 3))  # assigning a color to each classes of the data
+# calling the Faster RCNN ResNet50 model
+model = detection.fasterrcnn_resnet50_fpn_v2(pretrained=True, progress=True, pretrained_backbone=True).to(device)
+print(model.eval())  # prints out the architecture of the model
+# function to carry out object detection on images.
+def img_detect(img_path):
+    image = cv2.imread(img_path)  # reads the model using OpenCV
+    image = cv2.resize(image, (640, 480))
+    orig = image.copy()
+    # changing the colorspace from BGR to RGB (since Pytorch trains only RGB image)
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    image = image.transpose((2, 0, 1))  # swapping the color channels from channels last to channels first
+    image = np.expand_dims(image, axis=0)  # add batch dimension to the image
+    image = image / 255.0  # scaling image from (0,255) to (0,1)
+    image = torch.FloatTensor(image)  # changes the numpy array to a tensor.
+    image = image.to(device)
+    detections = model(image)[0]  # the image is passed to the model to get the bounding boxes
+    people = 0
+    # loop to construct bounding boxes on image.
+    for i in range(0, len(detections["boxes"])):
+        confidence = detections["scores"][i]  # get confidence score of each object in the image
+        idx = int(detections["labels"][i])  # identifying the id of each of the classes in the image
+        box = detections["boxes"][i].detach().cpu().numpy()  # gets the coordinates for the bounding boxes
+        (X_1, Y_1, X_2, Y_2) = box.astype("int")
+        if confidence > 0.75 and idx == 1:
+            # matching the label index with its classes and its probability
+            label = f"{classes[idx]}, {idx}: {confidence* 100}%"
+            print(f"[INFO] {label}")
+            people += 1
+            cv2.rectangle(orig, (X_1, Y_1), (X_2, Y_2), colors[idx], 2)  # draw bounding boxes over each object
+            y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15
+            # adds the label text to the image.
+            cv2.putText(orig, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
+            cv2.putText(orig, f"Number of People: {people}", (5, 19), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
+    return orig
+# function to perform object detection in videos
+def video_detection(video_path):
+    video = cv2.VideoCapture(video_path)
+    # frame_width = video.get(3)
+    # frame_height = video.get(4)
+    # out = cv2.VideoWriter(vid_out, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame_width, frame_height))
+    while video.isOpened():
+        ret, frame = video.read()
+        vid = frame.copy()
+        if not ret:
+            break
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        frame = transforms.functional.to_tensor(frame)
+        frame = frame.to(device)
+        vid_detect = model([frame])[0]
+        for i in range(0, len(vid_detect["boxes"])):
+            confidence = vid_detect["scores"][i]
+            if confidence > 0.75:
+                idx = int(vid_detect["labels"][i])
+                box = vid_detect["boxes"][i].detach().cpu().numpy()
+                (X_1, Y_1, X_2, Y_2) = box.astype("int")
+                label = f"{classes[idx]}, {idx}: {confidence* 100}%"
+                print(f"[INFO] {label}")
+                cv2.rectangle(vid, (X_1, Y_1), (X_2, Y_2), colors[idx], 2)
+                y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15
+                cv2.putText(vid, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
+    return vid

readme.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # Human Detector and Counter in Image
2	+
3	+ This