Testys commited on
Commit
59d1adb
1 Parent(s): 7124dcc

Commiting obj files

Browse files
Files changed (4) hide show
  1. deploy.py +44 -0
  2. faster_rcnn.ipynb +899 -0
  3. faster_rcnn.py +108 -0
  4. readme.md +3 -0
deploy.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from faster_rcnn import img_detect, video_detection
3
+
4
+ choice = "Image"
5
+
6
+ # Define detection function
7
+ def detect(input):
8
+ # Call the appropriate detection function based on the choice
9
+ if choice == "Image":
10
+ return img_detect(input)
11
+ elif choice == "Video":
12
+ return video_detection(input)
13
+ else:
14
+ return img_detect(input) # Default to image detection
15
+
16
+
17
+ # Define Gradio interface components
18
+ input = gr.Interface(
19
+ fn=detect,
20
+ inputs=[gr.Image(filepath="upload"), gr.Video(source="upload")],
21
+ outputs=[gr.Image(), gr.PlayableVideo()],
22
+ title="Object Detector",
23
+ layout="blocks",
24
+ )
25
+
26
+
27
+ # Define function to be triggered by button click
28
+ def on_button_click():
29
+ global choice
30
+ choice = demo.inputs[0] # Get the choice from the Dropdown component
31
+ input.set_input(input) # Update the input component based on the choice
32
+
33
+
34
+ # Create Gradio interface using blocks layout
35
+ with gr.Blocks() as demo:
36
+ choices = gr.Dropdown(
37
+ choices=["Image", "Video"],
38
+ label="What type of object would you like to detect?",
39
+ )
40
+ det = gr.Button('Detect')
41
+ det.click(on_button_click)
42
+
43
+ if __name__ == "__main__":
44
+ input.launch()
faster_rcnn.ipynb ADDED
@@ -0,0 +1,899 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {
7
+ "collapsed": true,
8
+ "id": "6oY4rQOq6RfM",
9
+ "pycharm": {
10
+ "is_executing": true
11
+ }
12
+ },
13
+ "outputs": [],
14
+ "source": [
15
+ "import os\n",
16
+ "import cv2\n",
17
+ "import torch\n",
18
+ "from torchvision import transforms\n",
19
+ "from torchvision.models import detection\n",
20
+ "from PIL import Image\n",
21
+ "import pickle\n",
22
+ "import numpy as np\n",
23
+ "import sys\n"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "code",
28
+ "execution_count": null,
29
+ "metadata": {
30
+ "id": "QKAf8q-l6RfX",
31
+ "pycharm": {
32
+ "is_executing": true
33
+ }
34
+ },
35
+ "outputs": [],
36
+ "source": [
37
+ "# checks if their is a gpu present, if not uses a cpu\n",
38
+ "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": 3,
44
+ "metadata": {
45
+ "id": "rYthXSEK6RfZ"
46
+ },
47
+ "outputs": [],
48
+ "source": [
49
+ "# mainly consists of the classes present in the coco dataset\n",
50
+ "classes = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',\n",
51
+ " 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',\n",
52
+ " 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',\n",
53
+ " 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',\n",
54
+ " 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',\n",
55
+ " 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',\n",
56
+ " 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',\n",
57
+ " 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',\n",
58
+ " 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',\n",
59
+ " 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',\n",
60
+ " 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',\n",
61
+ " 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n",
62
+ "\n",
63
+ "colors = np.random.uniform(0,255, size=(len(classes), 3)) # assigning a color to each classes of the data"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 4,
69
+ "metadata": {
70
+ "colab": {
71
+ "base_uri": "https://localhost:8080/",
72
+ "height": 225,
73
+ "referenced_widgets": [
74
+ "4fe268bf915b44038ba07a91c87a63ec",
75
+ "4e5a3302234341e0ba24b9054b129f99",
76
+ "8b1891821afc478684249870a93c961c",
77
+ "3cb0c74f631f49b4bcc3ffcdae17561f",
78
+ "7ee00c90f28643d697e697130f8e57b9",
79
+ "d02b913a20794bb7bb3c866370dcc7af",
80
+ "c7c1a147321d46f3ae3cffc419a04e2e",
81
+ "ecf3b69a10054324933edcaa322fcbfc",
82
+ "38f2071ea44a4ea991cae2303f70912d",
83
+ "541293c130504c69bb5ccf82780670d1",
84
+ "8ff4b383565e4e8c85c796561bd8a532"
85
+ ]
86
+ },
87
+ "id": "x55IKiUr6Rfc",
88
+ "outputId": "e98c3095-2629-47ac-f6d9-e6640263b111"
89
+ },
90
+ "outputs": [
91
+ {
92
+ "name": "stderr",
93
+ "output_type": "stream",
94
+ "text": [
95
+ "C:\\Users\\Testys\\anaconda3\\lib\\site-packages\\torchvision\\models\\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n",
96
+ " warnings.warn(\n",
97
+ "C:\\Users\\Testys\\anaconda3\\lib\\site-packages\\torchvision\\models\\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1`. You can also use `weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT` to get the most up-to-date weights.\n",
98
+ " warnings.warn(msg)\n",
99
+ "C:\\Users\\Testys\\anaconda3\\lib\\site-packages\\torchvision\\models\\_utils.py:208: UserWarning: The parameter 'pretrained_backbone' is deprecated since 0.13 and may be removed in the future, please use 'weights_backbone' instead.\n",
100
+ " warnings.warn(\n",
101
+ "C:\\Users\\Testys\\anaconda3\\lib\\site-packages\\torchvision\\models\\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights_backbone' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights_backbone=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights_backbone=ResNet50_Weights.DEFAULT` to get the most up-to-date weights.\n",
102
+ " warnings.warn(msg)\n"
103
+ ]
104
+ },
105
+ {
106
+ "data": {
107
+ "text/plain": "FasterRCNN(\n (transform): GeneralizedRCNNTransform(\n Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n Resize(min_size=(800,), max_size=1333, mode='bilinear')\n )\n (backbone): BackboneWithFPN(\n (body): IntermediateLayerGetter(\n (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n (bn1): FrozenBatchNorm2d(64, eps=0.0)\n (relu): ReLU(inplace=True)\n (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n (layer1): Sequential(\n (0): Bottleneck(\n (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(64, eps=0.0)\n (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(64, eps=0.0)\n (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(256, eps=0.0)\n (relu): ReLU(inplace=True)\n (downsample): Sequential(\n (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (1): FrozenBatchNorm2d(256, eps=0.0)\n )\n )\n (1): Bottleneck(\n (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(64, eps=0.0)\n (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(64, eps=0.0)\n (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(256, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (2): Bottleneck(\n (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(64, eps=0.0)\n (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(64, eps=0.0)\n (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(256, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n )\n (layer2): Sequential(\n (0): Bottleneck(\n (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(128, eps=0.0)\n (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(128, eps=0.0)\n (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(512, eps=0.0)\n (relu): ReLU(inplace=True)\n (downsample): Sequential(\n (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n (1): FrozenBatchNorm2d(512, eps=0.0)\n )\n )\n (1): Bottleneck(\n (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(128, eps=0.0)\n (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(128, eps=0.0)\n (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(512, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (2): Bottleneck(\n (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(128, eps=0.0)\n (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(128, eps=0.0)\n (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(512, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (3): Bottleneck(\n (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(128, eps=0.0)\n (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(128, eps=0.0)\n (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(512, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n )\n (layer3): Sequential(\n (0): Bottleneck(\n (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(256, eps=0.0)\n (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(256, eps=0.0)\n (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n (relu): ReLU(inplace=True)\n (downsample): Sequential(\n (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n (1): FrozenBatchNorm2d(1024, eps=0.0)\n )\n )\n (1): Bottleneck(\n (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(256, eps=0.0)\n (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(256, eps=0.0)\n (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (2): Bottleneck(\n (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(256, eps=0.0)\n (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(256, eps=0.0)\n (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (3): Bottleneck(\n (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(256, eps=0.0)\n (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(256, eps=0.0)\n (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (4): Bottleneck(\n (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(256, eps=0.0)\n (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(256, eps=0.0)\n (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (5): Bottleneck(\n (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(256, eps=0.0)\n (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(256, eps=0.0)\n (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n )\n (layer4): Sequential(\n (0): Bottleneck(\n (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(512, eps=0.0)\n (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(512, eps=0.0)\n (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(2048, eps=0.0)\n (relu): ReLU(inplace=True)\n (downsample): Sequential(\n (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n (1): FrozenBatchNorm2d(2048, eps=0.0)\n )\n )\n (1): Bottleneck(\n (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(512, eps=0.0)\n (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(512, eps=0.0)\n (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(2048, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (2): Bottleneck(\n (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(512, eps=0.0)\n (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(512, eps=0.0)\n (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(2048, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n )\n )\n (fpn): FeaturePyramidNetwork(\n (inner_blocks): ModuleList(\n (0): Conv2dNormActivation(\n (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))\n )\n (1): Conv2dNormActivation(\n (0): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))\n )\n (2): Conv2dNormActivation(\n (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))\n )\n (3): Conv2dNormActivation(\n (0): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))\n )\n )\n (layer_blocks): ModuleList(\n (0-3): 4 x Conv2dNormActivation(\n (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n )\n )\n (extra_blocks): LastLevelMaxPool()\n )\n )\n (rpn): RegionProposalNetwork(\n (anchor_generator): AnchorGenerator()\n (head): RPNHead(\n (conv): Sequential(\n (0): Conv2dNormActivation(\n (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n (1): ReLU(inplace=True)\n )\n )\n (cls_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1))\n (bbox_pred): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))\n )\n )\n (roi_heads): RoIHeads(\n (box_roi_pool): MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'], output_size=(7, 7), sampling_ratio=2)\n (box_head): TwoMLPHead(\n (fc6): Linear(in_features=12544, out_features=1024, bias=True)\n (fc7): Linear(in_features=1024, out_features=1024, bias=True)\n )\n (box_predictor): FastRCNNPredictor(\n (cls_score): Linear(in_features=1024, out_features=91, bias=True)\n (bbox_pred): Linear(in_features=1024, out_features=364, bias=True)\n )\n )\n)"
108
+ },
109
+ "execution_count": 4,
110
+ "metadata": {},
111
+ "output_type": "execute_result"
112
+ }
113
+ ],
114
+ "source": [
115
+ "# calling the Faster-RCNN ResNet50 model\n",
116
+ "model = detection.fasterrcnn_resnet50_fpn(pretrained=True, progress=True, pretrained_backbone=True).to(device)\n",
117
+ "model.eval() # prints out the architecture of the model"
118
+ ]
119
+ },
120
+ {
121
+ "cell_type": "code",
122
+ "execution_count": null,
123
+ "metadata": {
124
+ "colab": {
125
+ "background_save": true
126
+ },
127
+ "id": "-jLNrDyZ6Rfg",
128
+ "pycharm": {
129
+ "is_executing": true
130
+ }
131
+ },
132
+ "outputs": [],
133
+ "source": [
134
+ "image = cv2.imread(os.path.join(\".\",\"data\", \"fam1.HEIC\")) # reads the model using OpenCV\n",
135
+ "\n",
136
+ "image = cv2.resize(image, (640, 480))\n",
137
+ "\n",
138
+ "orig = image.copy()\n",
139
+ "\n",
140
+ "\n",
141
+ "image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # changing the colorspace from BGR to RGB (since Pytorch trains only RGB image)\n",
142
+ "image = image.transpose((2, 0, 1)) # swapping the color channels from channels last to channels first\n",
143
+ "\n",
144
+ "image = np.expand_dims(image, axis=0) # add batch dimension to the image\n",
145
+ "image = image / 255.0 # scaling image from (0,255) to (0,1)\n",
146
+ "image = torch.FloatTensor(image) # changes the numpy array to a tensor.\n",
147
+ "\n",
148
+ "\n",
149
+ "image= image.to(device)\n",
150
+ "# the image is passed to the model to get the bounding boxes\n",
151
+ "detections = model(image)[0]"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "code",
156
+ "execution_count": 8,
157
+ "metadata": {
158
+ "colab": {
159
+ "background_save": true
160
+ },
161
+ "id": "0FcDqO0l6Rfi",
162
+ "outputId": "0b988da8-7b38-499b-fc3e-2ca7ffdd629f"
163
+ },
164
+ "outputs": [
165
+ {
166
+ "name": "stdout",
167
+ "output_type": "stream",
168
+ "text": [
169
+ "[INFO] person, 1: 98.54804992675781%\n",
170
+ "[INFO] person, 1: 98.00418090820312%\n",
171
+ "[INFO] person, 1: 88.78005981445312%\n",
172
+ "[INFO] person, 1: 80.33210754394531%\n",
173
+ "[INFO] person, 1: 78.6150894165039%\n"
174
+ ]
175
+ },
176
+ {
177
+ "data": {
178
+ "text/plain": "32"
179
+ },
180
+ "execution_count": 8,
181
+ "metadata": {},
182
+ "output_type": "execute_result"
183
+ }
184
+ ],
185
+ "source": [
186
+ "people = 0\n",
187
+ "for i in range(0, len(detections[\"boxes\"])):\n",
188
+ " confidence = detections[\"scores\"][i] # get confidence score of each object in the image\n",
189
+ " idx = int(detections[\"labels\"][i]) # identifying the id of each of the classes in the image\n",
190
+ " box = detections[\"boxes\"][i].detach().cpu().numpy() # gets the coordinates for the bounding boxes\n",
191
+ " (X_1, Y_1, X_2, Y_2) = box.astype(\"int\")\n",
192
+ "\n",
193
+ " if confidence > 0.75 and idx == 1:\n",
194
+ " # matching the label index with its classes and its probability\n",
195
+ " label = f\"{classes[idx]}, {idx}: {confidence* 100}%\"\n",
196
+ " print(f\"[INFO] {label}\")\n",
197
+ " people += 1\n",
198
+ " cv2.rectangle(orig, (X_1, Y_1), (X_2, Y_2), colors[idx], 2) # draw bounding boxes over each object\n",
199
+ " y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15\n",
200
+ "\n",
201
+ " # adds the label text to the image.\n",
202
+ " cv2.putText(orig, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)\n",
203
+ " print(f\"People: {people}\")\n",
204
+ " cv2.putText(orig, f\"Number of People: {people}\", (5, 19), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)\n",
205
+ "\n",
206
+ "cv2.imwrite(\"./data/detected_img.jpg\", orig)\n",
207
+ "cv2.imshow(\"Image Detection\", orig)\n",
208
+ "cv2.waitKey(0)"
209
+ ]
210
+ },
211
+ {
212
+ "cell_type": "code",
213
+ "execution_count": 7,
214
+ "metadata": {
215
+ "colab": {
216
+ "background_save": true
217
+ },
218
+ "id": "Vf0X-Azk6Rfk"
219
+ },
220
+ "outputs": [],
221
+ "source": [
222
+ "# implementation for videos\n",
223
+ "def video_processing(video_path):\n",
224
+ " video = cv2.VideoCapture(video_path)\n",
225
+ " frame_width = int(video.get(3))\n",
226
+ " frame_height = int(video.get(4))\n",
227
+ " \n",
228
+ " # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.\n",
229
+ " out = cv2.VideoWriter('./data/outpy.mp4',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))\n",
230
+ "\n",
231
+ " while video.isOpened():\n",
232
+ " ret, frame = video.read()\n",
233
+ " vid = frame.copy()\n",
234
+ " if not ret:\n",
235
+ " break\n",
236
+ " frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n",
237
+ " frame = transforms.functional.to_tensor(frame)\n",
238
+ " frame = frame.to(device)\n",
239
+ " vid_detect = model([frame])[0] \n",
240
+ "\n",
241
+ " # return vid, vid_detect\n",
242
+ " for i in range(0, len(vid_detect[\"boxes\"])):\n",
243
+ " confidence = vid_detect[\"scores\"][i]\n",
244
+ "\n",
245
+ " if confidence > 0.75:\n",
246
+ " idx = int(vid_detect[\"labels\"][i])\n",
247
+ " box = vid_detect[\"boxes\"][i].detach().cpu().numpy()\n",
248
+ " (X_1, Y_1, X_2, Y_2) = box.astype(\"int\")\n",
249
+ "\n",
250
+ " label = f\"{classes[idx]}, {idx}: {confidence* 100}%\"\n",
251
+ " print(f\"[INFO] {label}\")\n",
252
+ "\n",
253
+ " cv2.rectangle(vid, (X_1, Y_1), (X_2, Y_2), colors[idx], 2)\n",
254
+ " y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15\n",
255
+ "\n",
256
+ " cv2.putText(vid, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)\n",
257
+ "\n",
258
+ " # cv2.imwrite(\"vid_detection.mp4\", vid)\n",
259
+ " \n",
260
+ " # cv2.imshow(\"Video Detection\", vid)\n",
261
+ " # key = cv2.waitKey(40) & 0xFF\n",
262
+ " # if key == ord(\" \"):\n",
263
+ " # break\n",
264
+ " \n",
265
+ " out.release()\n",
266
+ " cv2.destroyAllWindows()\n"
267
+ ]
268
+ },
269
+ {
270
+ "cell_type": "code",
271
+ "execution_count": null,
272
+ "metadata": {
273
+ "colab": {
274
+ "background_save": true
275
+ },
276
+ "id": "pvzjfhUM6Rfm",
277
+ "pycharm": {
278
+ "is_executing": true
279
+ }
280
+ },
281
+ "outputs": [
282
+ {
283
+ "name": "stdout",
284
+ "output_type": "stream",
285
+ "text": [
286
+ "[INFO] person, 1: 99.96479797363281%\n",
287
+ "[INFO] person, 1: 99.8873291015625%\n",
288
+ "[INFO] tie, 32: 84.8359375%\n",
289
+ "[INFO] skateboard, 41: 84.439697265625%\n",
290
+ "[INFO] person, 1: 99.8567123413086%\n",
291
+ "[INFO] tie, 32: 88.97736358642578%\n",
292
+ "[INFO] person, 1: 99.85549926757812%\n",
293
+ "[INFO] tie, 32: 85.5542221069336%\n",
294
+ "[INFO] person, 1: 99.85945892333984%\n",
295
+ "[INFO] tie, 32: 76.01469421386719%\n",
296
+ "[INFO] person, 1: 99.91976928710938%\n",
297
+ "[INFO] tie, 32: 75.80422973632812%\n",
298
+ "[INFO] person, 1: 99.92200469970703%\n",
299
+ "[INFO] person, 1: 99.91909790039062%\n",
300
+ "[INFO] person, 1: 99.87517547607422%\n",
301
+ "[INFO] person, 1: 99.95360565185547%\n",
302
+ "[INFO] person, 1: 99.94219207763672%\n",
303
+ "[INFO] person, 1: 99.911865234375%\n",
304
+ "[INFO] person, 1: 99.9067153930664%\n",
305
+ "[INFO] person, 1: 99.92170715332031%\n",
306
+ "[INFO] person, 1: 99.92486572265625%\n",
307
+ "[INFO] tie, 32: 81.37924194335938%\n",
308
+ "[INFO] person, 1: 99.9447021484375%\n",
309
+ "[INFO] person, 1: 99.90923309326172%\n",
310
+ "[INFO] person, 1: 99.9104232788086%\n",
311
+ "[INFO] person, 1: 99.9041976928711%\n",
312
+ "[INFO] person, 1: 99.92522430419922%\n",
313
+ "[INFO] person, 1: 99.90491485595703%\n",
314
+ "[INFO] person, 1: 99.91775512695312%\n",
315
+ "[INFO] person, 1: 99.91704559326172%\n",
316
+ "[INFO] person, 1: 99.8878173828125%\n",
317
+ "[INFO] person, 1: 99.89239501953125%\n",
318
+ "[INFO] person, 1: 99.89933013916016%\n",
319
+ "[INFO] tie, 32: 77.39177703857422%\n",
320
+ "[INFO] person, 1: 99.92431640625%\n",
321
+ "[INFO] tie, 32: 86.86376190185547%\n",
322
+ "[INFO] person, 1: 99.9134750366211%\n",
323
+ "[INFO] tie, 32: 90.91716766357422%\n",
324
+ "[INFO] person, 1: 99.86553192138672%\n",
325
+ "[INFO] tie, 32: 83.31379699707031%\n",
326
+ "[INFO] person, 1: 99.89295959472656%\n",
327
+ "[INFO] tie, 32: 82.80715942382812%\n",
328
+ "[INFO] person, 1: 99.93063354492188%\n",
329
+ "[INFO] tie, 32: 84.20972442626953%\n",
330
+ "[INFO] person, 1: 99.90483856201172%\n",
331
+ "[INFO] tie, 32: 86.09645080566406%\n",
332
+ "[INFO] person, 1: 99.91349792480469%\n",
333
+ "[INFO] tie, 32: 86.98382568359375%\n",
334
+ "[INFO] person, 1: 99.92369079589844%\n",
335
+ "[INFO] tie, 32: 80.03042602539062%\n",
336
+ "[INFO] person, 1: 99.92707061767578%\n",
337
+ "[INFO] tie, 32: 91.95155334472656%\n",
338
+ "[INFO] person, 1: 99.92926025390625%\n",
339
+ "[INFO] tie, 32: 88.84310150146484%\n",
340
+ "[INFO] person, 1: 99.93949890136719%\n",
341
+ "[INFO] tie, 32: 92.20435333251953%\n",
342
+ "[INFO] person, 1: 99.94140625%\n",
343
+ "[INFO] tie, 32: 91.74589538574219%\n",
344
+ "[INFO] person, 1: 99.87340545654297%\n",
345
+ "[INFO] person, 1: 99.90251159667969%\n",
346
+ "[INFO] tie, 32: 77.56298065185547%\n",
347
+ "[INFO] person, 1: 99.92112731933594%\n",
348
+ "[INFO] tie, 32: 88.31008911132812%\n",
349
+ "[INFO] person, 1: 99.84620666503906%\n",
350
+ "[INFO] person, 1: 99.82284545898438%\n",
351
+ "[INFO] person, 1: 75.87650299072266%\n",
352
+ "[INFO] person, 1: 99.90447235107422%\n",
353
+ "[INFO] person, 1: 99.90357971191406%\n",
354
+ "[INFO] tie, 32: 87.86799621582031%\n",
355
+ "[INFO] person, 1: 99.89325714111328%\n",
356
+ "[INFO] person, 1: 99.89271545410156%\n",
357
+ "[INFO] tie, 32: 85.549560546875%\n",
358
+ "[INFO] person, 1: 99.90028381347656%\n",
359
+ "[INFO] tie, 32: 89.2087173461914%\n",
360
+ "[INFO] person, 1: 99.81275177001953%\n",
361
+ "[INFO] tie, 32: 92.40033721923828%\n",
362
+ "[INFO] person, 1: 99.83216857910156%\n",
363
+ "[INFO] tie, 32: 90.578857421875%\n",
364
+ "[INFO] person, 1: 99.80609893798828%\n",
365
+ "[INFO] skateboard, 41: 96.36973571777344%\n",
366
+ "[INFO] tie, 32: 96.11731719970703%\n",
367
+ "[INFO] skateboard, 41: 82.39974212646484%\n",
368
+ "[INFO] person, 1: 99.69965362548828%\n",
369
+ "[INFO] tie, 32: 95.91572570800781%\n",
370
+ "[INFO] person, 1: 99.69015502929688%\n",
371
+ "[INFO] tie, 32: 96.45115661621094%\n",
372
+ "[INFO] person, 1: 99.815673828125%\n",
373
+ "[INFO] tie, 32: 96.90648651123047%\n",
374
+ "[INFO] person, 1: 99.90100860595703%\n",
375
+ "[INFO] tie, 32: 85.73446655273438%\n",
376
+ "[INFO] person, 1: 99.87108612060547%\n",
377
+ "[INFO] tie, 32: 83.69398498535156%\n",
378
+ "[INFO] person, 1: 99.92547607421875%\n",
379
+ "[INFO] tie, 32: 81.19554138183594%\n",
380
+ "[INFO] person, 1: 99.88804626464844%\n",
381
+ "[INFO] person, 1: 99.8199462890625%\n",
382
+ "[INFO] tie, 32: 82.75387573242188%\n",
383
+ "[INFO] person, 1: 99.88672637939453%\n",
384
+ "[INFO] tie, 32: 88.2742919921875%\n",
385
+ "[INFO] person, 1: 99.87617492675781%\n",
386
+ "[INFO] tie, 32: 97.07515716552734%\n",
387
+ "[INFO] person, 1: 99.87181091308594%\n",
388
+ "[INFO] tie, 32: 98.3101577758789%\n",
389
+ "[INFO] person, 1: 99.88245391845703%\n",
390
+ "[INFO] tie, 32: 97.0067138671875%\n",
391
+ "[INFO] person, 1: 99.80060577392578%\n",
392
+ "[INFO] tie, 32: 97.07807922363281%\n",
393
+ "[INFO] person, 1: 99.88915252685547%\n",
394
+ "[INFO] tie, 32: 92.22811126708984%\n",
395
+ "[INFO] person, 1: 99.92195129394531%\n",
396
+ "[INFO] tie, 32: 89.54218292236328%\n",
397
+ "[INFO] person, 1: 99.9444580078125%\n",
398
+ "[INFO] tie, 32: 88.38265228271484%\n",
399
+ "[INFO] person, 1: 99.93598937988281%\n",
400
+ "[INFO] tie, 32: 85.4147720336914%\n",
401
+ "[INFO] person, 1: 99.80413055419922%\n",
402
+ "[INFO] person, 1: 99.83075714111328%\n",
403
+ "[INFO] person, 1: 99.8851089477539%\n",
404
+ "[INFO] tie, 32: 83.0019760131836%\n",
405
+ "[INFO] person, 1: 99.87308502197266%\n",
406
+ "[INFO] tie, 32: 87.57421112060547%\n",
407
+ "[INFO] person, 1: 99.92283630371094%\n",
408
+ "[INFO] tie, 32: 86.23603820800781%\n",
409
+ "[INFO] person, 1: 99.90347290039062%\n",
410
+ "[INFO] tie, 32: 75.47425842285156%\n",
411
+ "[INFO] person, 1: 99.849853515625%\n",
412
+ "[INFO] person, 1: 99.78257751464844%\n",
413
+ "[INFO] tie, 32: 84.9027328491211%\n",
414
+ "[INFO] person, 1: 99.9006118774414%\n",
415
+ "[INFO] tie, 32: 81.21675872802734%\n",
416
+ "[INFO] person, 1: 99.89569091796875%\n",
417
+ "[INFO] tie, 32: 83.1388931274414%\n",
418
+ "[INFO] person, 1: 99.8825454711914%\n",
419
+ "[INFO] tie, 32: 84.85138702392578%\n",
420
+ "[INFO] person, 1: 99.90423583984375%\n",
421
+ "[INFO] person, 1: 99.91820526123047%\n",
422
+ "[INFO] person, 1: 99.90950775146484%\n",
423
+ "[INFO] person, 1: 99.91889953613281%\n",
424
+ "[INFO] person, 1: 99.88500213623047%\n",
425
+ "[INFO] tie, 32: 85.9897689819336%\n",
426
+ "[INFO] person, 1: 99.83943176269531%\n",
427
+ "[INFO] person, 1: 99.72062683105469%\n",
428
+ "[INFO] skateboard, 41: 92.7118148803711%\n",
429
+ "[INFO] person, 1: 99.57286834716797%\n",
430
+ "[INFO] skateboard, 41: 94.51634216308594%\n",
431
+ "[INFO] person, 1: 99.65888214111328%\n",
432
+ "[INFO] skateboard, 41: 93.3429946899414%\n",
433
+ "[INFO] person, 1: 99.45445251464844%\n",
434
+ "[INFO] skateboard, 41: 87.95608520507812%\n",
435
+ "[INFO] person, 1: 99.67414855957031%\n",
436
+ "[INFO] tie, 32: 89.96955108642578%\n",
437
+ "[INFO] skateboard, 41: 79.02975463867188%\n",
438
+ "[INFO] person, 1: 99.56875610351562%\n",
439
+ "[INFO] person, 1: 99.49411010742188%\n",
440
+ "[INFO] person, 1: 99.68108367919922%\n",
441
+ "[INFO] person, 1: 99.48446655273438%\n",
442
+ "[INFO] person, 1: 99.75424194335938%\n",
443
+ "[INFO] person, 1: 99.55342102050781%\n",
444
+ "[INFO] person, 1: 99.61231231689453%\n",
445
+ "[INFO] person, 1: 99.55381774902344%\n",
446
+ "[INFO] person, 1: 99.65033721923828%\n",
447
+ "[INFO] person, 1: 99.75348663330078%\n",
448
+ "[INFO] person, 1: 99.45124053955078%\n",
449
+ "[INFO] person, 1: 99.50060272216797%\n",
450
+ "[INFO] person, 1: 99.36222076416016%\n",
451
+ "[INFO] person, 1: 99.62764739990234%\n",
452
+ "[INFO] person, 1: 99.55170440673828%\n",
453
+ "[INFO] person, 1: 99.5832290649414%\n",
454
+ "[INFO] person, 1: 99.52233123779297%\n",
455
+ "[INFO] person, 1: 99.27761840820312%\n",
456
+ "[INFO] person, 1: 78.00102233886719%\n",
457
+ "[INFO] person, 1: 99.5089111328125%\n",
458
+ "[INFO] person, 1: 99.4920883178711%\n",
459
+ "[INFO] person, 1: 99.27015686035156%\n"
460
+ ]
461
+ }
462
+ ],
463
+ "source": [
464
+ "video_processing(\"./data/a1.mp4\")"
465
+ ]
466
+ },
467
+ {
468
+ "cell_type": "code",
469
+ "execution_count": null,
470
+ "metadata": {
471
+ "id": "_cKB9rwY6Rfo",
472
+ "pycharm": {
473
+ "is_executing": true
474
+ }
475
+ },
476
+ "outputs": [],
477
+ "source": [
478
+ "vid= cv2.VideoCapture(\"ai.mp4\")\n",
479
+ "ret = True\n",
480
+ "while ret:\n",
481
+ " ret, frame = vid.read()\n",
482
+ "\n",
483
+ " if ret:\n",
484
+ " cv2.imshow(\"Video Window\", frame)\n",
485
+ " cv2.waitKey(40)\n",
486
+ "\n",
487
+ "# vid.release()\n",
488
+ "# cv2.destroyAllWindows()\n"
489
+ ]
490
+ },
491
+ {
492
+ "cell_type": "code",
493
+ "execution_count": 2,
494
+ "metadata": {
495
+ "id": "OHtxLb7kg8tf"
496
+ },
497
+ "outputs": [],
498
+ "source": []
499
+ },
500
+ {
501
+ "cell_type": "code",
502
+ "execution_count": null,
503
+ "outputs": [],
504
+ "source": [],
505
+ "metadata": {
506
+ "collapsed": false,
507
+ "pycharm": {
508
+ "name": "#%%\n",
509
+ "is_executing": true
510
+ }
511
+ }
512
+ },
513
+ {
514
+ "cell_type": "code",
515
+ "execution_count": null,
516
+ "outputs": [],
517
+ "source": [],
518
+ "metadata": {
519
+ "collapsed": false,
520
+ "pycharm": {
521
+ "name": "#%%\n",
522
+ "is_executing": true
523
+ }
524
+ }
525
+ }
526
+ ],
527
+ "metadata": {
528
+ "accelerator": "GPU",
529
+ "colab": {
530
+ "provenance": []
531
+ },
532
+ "gpuClass": "standard",
533
+ "kernelspec": {
534
+ "display_name": "Python 3",
535
+ "language": "python",
536
+ "name": "python3"
537
+ },
538
+ "language_info": {
539
+ "codemirror_mode": {
540
+ "name": "ipython",
541
+ "version": 2
542
+ },
543
+ "file_extension": ".py",
544
+ "mimetype": "text/x-python",
545
+ "name": "python",
546
+ "nbconvert_exporter": "python",
547
+ "pygments_lexer": "ipython2",
548
+ "version": "2.7.6"
549
+ },
550
+ "widgets": {
551
+ "application/vnd.jupyter.widget-state+json": {
552
+ "38f2071ea44a4ea991cae2303f70912d": {
553
+ "model_module": "@jupyter-widgets/controls",
554
+ "model_module_version": "1.5.0",
555
+ "model_name": "ProgressStyleModel",
556
+ "state": {
557
+ "_model_module": "@jupyter-widgets/controls",
558
+ "_model_module_version": "1.5.0",
559
+ "_model_name": "ProgressStyleModel",
560
+ "_view_count": null,
561
+ "_view_module": "@jupyter-widgets/base",
562
+ "_view_module_version": "1.2.0",
563
+ "_view_name": "StyleView",
564
+ "bar_color": null,
565
+ "description_width": ""
566
+ }
567
+ },
568
+ "3cb0c74f631f49b4bcc3ffcdae17561f": {
569
+ "model_module": "@jupyter-widgets/controls",
570
+ "model_module_version": "1.5.0",
571
+ "model_name": "HTMLModel",
572
+ "state": {
573
+ "_dom_classes": [],
574
+ "_model_module": "@jupyter-widgets/controls",
575
+ "_model_module_version": "1.5.0",
576
+ "_model_name": "HTMLModel",
577
+ "_view_count": null,
578
+ "_view_module": "@jupyter-widgets/controls",
579
+ "_view_module_version": "1.5.0",
580
+ "_view_name": "HTMLView",
581
+ "description": "",
582
+ "description_tooltip": null,
583
+ "layout": "IPY_MODEL_541293c130504c69bb5ccf82780670d1",
584
+ "placeholder": "​",
585
+ "style": "IPY_MODEL_8ff4b383565e4e8c85c796561bd8a532",
586
+ "value": " 160M/160M [00:02<00:00, 73.2MB/s]"
587
+ }
588
+ },
589
+ "4e5a3302234341e0ba24b9054b129f99": {
590
+ "model_module": "@jupyter-widgets/controls",
591
+ "model_module_version": "1.5.0",
592
+ "model_name": "HTMLModel",
593
+ "state": {
594
+ "_dom_classes": [],
595
+ "_model_module": "@jupyter-widgets/controls",
596
+ "_model_module_version": "1.5.0",
597
+ "_model_name": "HTMLModel",
598
+ "_view_count": null,
599
+ "_view_module": "@jupyter-widgets/controls",
600
+ "_view_module_version": "1.5.0",
601
+ "_view_name": "HTMLView",
602
+ "description": "",
603
+ "description_tooltip": null,
604
+ "layout": "IPY_MODEL_d02b913a20794bb7bb3c866370dcc7af",
605
+ "placeholder": "​",
606
+ "style": "IPY_MODEL_c7c1a147321d46f3ae3cffc419a04e2e",
607
+ "value": "100%"
608
+ }
609
+ },
610
+ "4fe268bf915b44038ba07a91c87a63ec": {
611
+ "model_module": "@jupyter-widgets/controls",
612
+ "model_module_version": "1.5.0",
613
+ "model_name": "HBoxModel",
614
+ "state": {
615
+ "_dom_classes": [],
616
+ "_model_module": "@jupyter-widgets/controls",
617
+ "_model_module_version": "1.5.0",
618
+ "_model_name": "HBoxModel",
619
+ "_view_count": null,
620
+ "_view_module": "@jupyter-widgets/controls",
621
+ "_view_module_version": "1.5.0",
622
+ "_view_name": "HBoxView",
623
+ "box_style": "",
624
+ "children": [
625
+ "IPY_MODEL_4e5a3302234341e0ba24b9054b129f99",
626
+ "IPY_MODEL_8b1891821afc478684249870a93c961c",
627
+ "IPY_MODEL_3cb0c74f631f49b4bcc3ffcdae17561f"
628
+ ],
629
+ "layout": "IPY_MODEL_7ee00c90f28643d697e697130f8e57b9"
630
+ }
631
+ },
632
+ "541293c130504c69bb5ccf82780670d1": {
633
+ "model_module": "@jupyter-widgets/base",
634
+ "model_module_version": "1.2.0",
635
+ "model_name": "LayoutModel",
636
+ "state": {
637
+ "_model_module": "@jupyter-widgets/base",
638
+ "_model_module_version": "1.2.0",
639
+ "_model_name": "LayoutModel",
640
+ "_view_count": null,
641
+ "_view_module": "@jupyter-widgets/base",
642
+ "_view_module_version": "1.2.0",
643
+ "_view_name": "LayoutView",
644
+ "align_content": null,
645
+ "align_items": null,
646
+ "align_self": null,
647
+ "border": null,
648
+ "bottom": null,
649
+ "display": null,
650
+ "flex": null,
651
+ "flex_flow": null,
652
+ "grid_area": null,
653
+ "grid_auto_columns": null,
654
+ "grid_auto_flow": null,
655
+ "grid_auto_rows": null,
656
+ "grid_column": null,
657
+ "grid_gap": null,
658
+ "grid_row": null,
659
+ "grid_template_areas": null,
660
+ "grid_template_columns": null,
661
+ "grid_template_rows": null,
662
+ "height": null,
663
+ "justify_content": null,
664
+ "justify_items": null,
665
+ "left": null,
666
+ "margin": null,
667
+ "max_height": null,
668
+ "max_width": null,
669
+ "min_height": null,
670
+ "min_width": null,
671
+ "object_fit": null,
672
+ "object_position": null,
673
+ "order": null,
674
+ "overflow": null,
675
+ "overflow_x": null,
676
+ "overflow_y": null,
677
+ "padding": null,
678
+ "right": null,
679
+ "top": null,
680
+ "visibility": null,
681
+ "width": null
682
+ }
683
+ },
684
+ "7ee00c90f28643d697e697130f8e57b9": {
685
+ "model_module": "@jupyter-widgets/base",
686
+ "model_module_version": "1.2.0",
687
+ "model_name": "LayoutModel",
688
+ "state": {
689
+ "_model_module": "@jupyter-widgets/base",
690
+ "_model_module_version": "1.2.0",
691
+ "_model_name": "LayoutModel",
692
+ "_view_count": null,
693
+ "_view_module": "@jupyter-widgets/base",
694
+ "_view_module_version": "1.2.0",
695
+ "_view_name": "LayoutView",
696
+ "align_content": null,
697
+ "align_items": null,
698
+ "align_self": null,
699
+ "border": null,
700
+ "bottom": null,
701
+ "display": null,
702
+ "flex": null,
703
+ "flex_flow": null,
704
+ "grid_area": null,
705
+ "grid_auto_columns": null,
706
+ "grid_auto_flow": null,
707
+ "grid_auto_rows": null,
708
+ "grid_column": null,
709
+ "grid_gap": null,
710
+ "grid_row": null,
711
+ "grid_template_areas": null,
712
+ "grid_template_columns": null,
713
+ "grid_template_rows": null,
714
+ "height": null,
715
+ "justify_content": null,
716
+ "justify_items": null,
717
+ "left": null,
718
+ "margin": null,
719
+ "max_height": null,
720
+ "max_width": null,
721
+ "min_height": null,
722
+ "min_width": null,
723
+ "object_fit": null,
724
+ "object_position": null,
725
+ "order": null,
726
+ "overflow": null,
727
+ "overflow_x": null,
728
+ "overflow_y": null,
729
+ "padding": null,
730
+ "right": null,
731
+ "top": null,
732
+ "visibility": null,
733
+ "width": null
734
+ }
735
+ },
736
+ "8b1891821afc478684249870a93c961c": {
737
+ "model_module": "@jupyter-widgets/controls",
738
+ "model_module_version": "1.5.0",
739
+ "model_name": "FloatProgressModel",
740
+ "state": {
741
+ "_dom_classes": [],
742
+ "_model_module": "@jupyter-widgets/controls",
743
+ "_model_module_version": "1.5.0",
744
+ "_model_name": "FloatProgressModel",
745
+ "_view_count": null,
746
+ "_view_module": "@jupyter-widgets/controls",
747
+ "_view_module_version": "1.5.0",
748
+ "_view_name": "ProgressView",
749
+ "bar_style": "success",
750
+ "description": "",
751
+ "description_tooltip": null,
752
+ "layout": "IPY_MODEL_ecf3b69a10054324933edcaa322fcbfc",
753
+ "max": 167502836,
754
+ "min": 0,
755
+ "orientation": "horizontal",
756
+ "style": "IPY_MODEL_38f2071ea44a4ea991cae2303f70912d",
757
+ "value": 167502836
758
+ }
759
+ },
760
+ "8ff4b383565e4e8c85c796561bd8a532": {
761
+ "model_module": "@jupyter-widgets/controls",
762
+ "model_module_version": "1.5.0",
763
+ "model_name": "DescriptionStyleModel",
764
+ "state": {
765
+ "_model_module": "@jupyter-widgets/controls",
766
+ "_model_module_version": "1.5.0",
767
+ "_model_name": "DescriptionStyleModel",
768
+ "_view_count": null,
769
+ "_view_module": "@jupyter-widgets/base",
770
+ "_view_module_version": "1.2.0",
771
+ "_view_name": "StyleView",
772
+ "description_width": ""
773
+ }
774
+ },
775
+ "c7c1a147321d46f3ae3cffc419a04e2e": {
776
+ "model_module": "@jupyter-widgets/controls",
777
+ "model_module_version": "1.5.0",
778
+ "model_name": "DescriptionStyleModel",
779
+ "state": {
780
+ "_model_module": "@jupyter-widgets/controls",
781
+ "_model_module_version": "1.5.0",
782
+ "_model_name": "DescriptionStyleModel",
783
+ "_view_count": null,
784
+ "_view_module": "@jupyter-widgets/base",
785
+ "_view_module_version": "1.2.0",
786
+ "_view_name": "StyleView",
787
+ "description_width": ""
788
+ }
789
+ },
790
+ "d02b913a20794bb7bb3c866370dcc7af": {
791
+ "model_module": "@jupyter-widgets/base",
792
+ "model_module_version": "1.2.0",
793
+ "model_name": "LayoutModel",
794
+ "state": {
795
+ "_model_module": "@jupyter-widgets/base",
796
+ "_model_module_version": "1.2.0",
797
+ "_model_name": "LayoutModel",
798
+ "_view_count": null,
799
+ "_view_module": "@jupyter-widgets/base",
800
+ "_view_module_version": "1.2.0",
801
+ "_view_name": "LayoutView",
802
+ "align_content": null,
803
+ "align_items": null,
804
+ "align_self": null,
805
+ "border": null,
806
+ "bottom": null,
807
+ "display": null,
808
+ "flex": null,
809
+ "flex_flow": null,
810
+ "grid_area": null,
811
+ "grid_auto_columns": null,
812
+ "grid_auto_flow": null,
813
+ "grid_auto_rows": null,
814
+ "grid_column": null,
815
+ "grid_gap": null,
816
+ "grid_row": null,
817
+ "grid_template_areas": null,
818
+ "grid_template_columns": null,
819
+ "grid_template_rows": null,
820
+ "height": null,
821
+ "justify_content": null,
822
+ "justify_items": null,
823
+ "left": null,
824
+ "margin": null,
825
+ "max_height": null,
826
+ "max_width": null,
827
+ "min_height": null,
828
+ "min_width": null,
829
+ "object_fit": null,
830
+ "object_position": null,
831
+ "order": null,
832
+ "overflow": null,
833
+ "overflow_x": null,
834
+ "overflow_y": null,
835
+ "padding": null,
836
+ "right": null,
837
+ "top": null,
838
+ "visibility": null,
839
+ "width": null
840
+ }
841
+ },
842
+ "ecf3b69a10054324933edcaa322fcbfc": {
843
+ "model_module": "@jupyter-widgets/base",
844
+ "model_module_version": "1.2.0",
845
+ "model_name": "LayoutModel",
846
+ "state": {
847
+ "_model_module": "@jupyter-widgets/base",
848
+ "_model_module_version": "1.2.0",
849
+ "_model_name": "LayoutModel",
850
+ "_view_count": null,
851
+ "_view_module": "@jupyter-widgets/base",
852
+ "_view_module_version": "1.2.0",
853
+ "_view_name": "LayoutView",
854
+ "align_content": null,
855
+ "align_items": null,
856
+ "align_self": null,
857
+ "border": null,
858
+ "bottom": null,
859
+ "display": null,
860
+ "flex": null,
861
+ "flex_flow": null,
862
+ "grid_area": null,
863
+ "grid_auto_columns": null,
864
+ "grid_auto_flow": null,
865
+ "grid_auto_rows": null,
866
+ "grid_column": null,
867
+ "grid_gap": null,
868
+ "grid_row": null,
869
+ "grid_template_areas": null,
870
+ "grid_template_columns": null,
871
+ "grid_template_rows": null,
872
+ "height": null,
873
+ "justify_content": null,
874
+ "justify_items": null,
875
+ "left": null,
876
+ "margin": null,
877
+ "max_height": null,
878
+ "max_width": null,
879
+ "min_height": null,
880
+ "min_width": null,
881
+ "object_fit": null,
882
+ "object_position": null,
883
+ "order": null,
884
+ "overflow": null,
885
+ "overflow_x": null,
886
+ "overflow_y": null,
887
+ "padding": null,
888
+ "right": null,
889
+ "top": null,
890
+ "visibility": null,
891
+ "width": null
892
+ }
893
+ }
894
+ }
895
+ }
896
+ },
897
+ "nbformat": 4,
898
+ "nbformat_minor": 0
899
+ }
faster_rcnn.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # importing modules
2
+ import cv2
3
+ import torch
4
+ from torchvision import transforms
5
+ from torchvision.models import detection
6
+ import numpy as np
7
+
8
+ # checks if their is a gpu present, if not uses a cpu
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+
11
+ # mainly consists of the classes present in the coco dataset
12
+ classes = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
13
+ 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
14
+ 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
15
+ 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
16
+ 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
17
+ 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
18
+ 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
19
+ 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
20
+ 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
21
+ 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
22
+ 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
23
+ 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
24
+
25
+ colors = np.random.uniform(0, 255, size=(len(classes), 3)) # assigning a color to each classes of the data
26
+
27
+
28
+ # calling the Faster RCNN ResNet50 model
29
+ model = detection.fasterrcnn_resnet50_fpn_v2(pretrained=True, progress=True, pretrained_backbone=True).to(device)
30
+ print(model.eval()) # prints out the architecture of the model
31
+
32
+
33
+ # function to carry out object detection on images.
34
+ def img_detect(img_path):
35
+ image = cv2.imread(img_path) # reads the model using OpenCV
36
+ image = cv2.resize(image, (640, 480))
37
+ orig = image.copy()
38
+
39
+ # changing the colorspace from BGR to RGB (since Pytorch trains only RGB image)
40
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
41
+
42
+ image = image.transpose((2, 0, 1)) # swapping the color channels from channels last to channels first
43
+
44
+ image = np.expand_dims(image, axis=0) # add batch dimension to the image
45
+ image = image / 255.0 # scaling image from (0,255) to (0,1)
46
+ image = torch.FloatTensor(image) # changes the numpy array to a tensor.
47
+
48
+ image = image.to(device)
49
+ detections = model(image)[0] # the image is passed to the model to get the bounding boxes
50
+
51
+ people = 0
52
+ # loop to construct bounding boxes on image.
53
+ for i in range(0, len(detections["boxes"])):
54
+ confidence = detections["scores"][i] # get confidence score of each object in the image
55
+ idx = int(detections["labels"][i]) # identifying the id of each of the classes in the image
56
+ box = detections["boxes"][i].detach().cpu().numpy() # gets the coordinates for the bounding boxes
57
+ (X_1, Y_1, X_2, Y_2) = box.astype("int")
58
+
59
+ if confidence > 0.75 and idx == 1:
60
+ # matching the label index with its classes and its probability
61
+ label = f"{classes[idx]}, {idx}: {confidence* 100}%"
62
+ print(f"[INFO] {label}")
63
+ people += 1
64
+ cv2.rectangle(orig, (X_1, Y_1), (X_2, Y_2), colors[idx], 2) # draw bounding boxes over each object
65
+ y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15
66
+
67
+ # adds the label text to the image.
68
+ cv2.putText(orig, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
69
+ cv2.putText(orig, f"Number of People: {people}", (5, 19), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
70
+
71
+ return orig
72
+
73
+
74
+ # function to perform object detection in videos
75
+ def video_detection(video_path):
76
+ video = cv2.VideoCapture(video_path)
77
+ # frame_width = video.get(3)
78
+ # frame_height = video.get(4)
79
+
80
+ # out = cv2.VideoWriter(vid_out, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame_width, frame_height))
81
+
82
+ while video.isOpened():
83
+ ret, frame = video.read()
84
+ vid = frame.copy()
85
+ if not ret:
86
+ break
87
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
88
+ frame = transforms.functional.to_tensor(frame)
89
+ frame = frame.to(device)
90
+ vid_detect = model([frame])[0]
91
+
92
+ for i in range(0, len(vid_detect["boxes"])):
93
+ confidence = vid_detect["scores"][i]
94
+
95
+ if confidence > 0.75:
96
+ idx = int(vid_detect["labels"][i])
97
+ box = vid_detect["boxes"][i].detach().cpu().numpy()
98
+ (X_1, Y_1, X_2, Y_2) = box.astype("int")
99
+
100
+ label = f"{classes[idx]}, {idx}: {confidence* 100}%"
101
+ print(f"[INFO] {label}")
102
+
103
+ cv2.rectangle(vid, (X_1, Y_1), (X_2, Y_2), colors[idx], 2)
104
+ y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15
105
+
106
+ cv2.putText(vid, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
107
+
108
+ return vid
readme.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Human Detector and Counter in Image
2
+
3
+ This