Spaces:
Runtime error
Runtime error
Commiting obj files
Browse files- deploy.py +44 -0
- faster_rcnn.ipynb +899 -0
- faster_rcnn.py +108 -0
- readme.md +3 -0
deploy.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from faster_rcnn import img_detect, video_detection
|
3 |
+
|
4 |
+
choice = "Image"
|
5 |
+
|
6 |
+
# Define detection function
|
7 |
+
def detect(input):
|
8 |
+
# Call the appropriate detection function based on the choice
|
9 |
+
if choice == "Image":
|
10 |
+
return img_detect(input)
|
11 |
+
elif choice == "Video":
|
12 |
+
return video_detection(input)
|
13 |
+
else:
|
14 |
+
return img_detect(input) # Default to image detection
|
15 |
+
|
16 |
+
|
17 |
+
# Define Gradio interface components
|
18 |
+
input = gr.Interface(
|
19 |
+
fn=detect,
|
20 |
+
inputs=[gr.Image(filepath="upload"), gr.Video(source="upload")],
|
21 |
+
outputs=[gr.Image(), gr.PlayableVideo()],
|
22 |
+
title="Object Detector",
|
23 |
+
layout="blocks",
|
24 |
+
)
|
25 |
+
|
26 |
+
|
27 |
+
# Define function to be triggered by button click
|
28 |
+
def on_button_click():
|
29 |
+
global choice
|
30 |
+
choice = demo.inputs[0] # Get the choice from the Dropdown component
|
31 |
+
input.set_input(input) # Update the input component based on the choice
|
32 |
+
|
33 |
+
|
34 |
+
# Create Gradio interface using blocks layout
|
35 |
+
with gr.Blocks() as demo:
|
36 |
+
choices = gr.Dropdown(
|
37 |
+
choices=["Image", "Video"],
|
38 |
+
label="What type of object would you like to detect?",
|
39 |
+
)
|
40 |
+
det = gr.Button('Detect')
|
41 |
+
det.click(on_button_click)
|
42 |
+
|
43 |
+
if __name__ == "__main__":
|
44 |
+
input.launch()
|
faster_rcnn.ipynb
ADDED
@@ -0,0 +1,899 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {
|
7 |
+
"collapsed": true,
|
8 |
+
"id": "6oY4rQOq6RfM",
|
9 |
+
"pycharm": {
|
10 |
+
"is_executing": true
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"outputs": [],
|
14 |
+
"source": [
|
15 |
+
"import os\n",
|
16 |
+
"import cv2\n",
|
17 |
+
"import torch\n",
|
18 |
+
"from torchvision import transforms\n",
|
19 |
+
"from torchvision.models import detection\n",
|
20 |
+
"from PIL import Image\n",
|
21 |
+
"import pickle\n",
|
22 |
+
"import numpy as np\n",
|
23 |
+
"import sys\n"
|
24 |
+
]
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"cell_type": "code",
|
28 |
+
"execution_count": null,
|
29 |
+
"metadata": {
|
30 |
+
"id": "QKAf8q-l6RfX",
|
31 |
+
"pycharm": {
|
32 |
+
"is_executing": true
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"outputs": [],
|
36 |
+
"source": [
|
37 |
+
"# checks if their is a gpu present, if not uses a cpu\n",
|
38 |
+
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
|
39 |
+
]
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"cell_type": "code",
|
43 |
+
"execution_count": 3,
|
44 |
+
"metadata": {
|
45 |
+
"id": "rYthXSEK6RfZ"
|
46 |
+
},
|
47 |
+
"outputs": [],
|
48 |
+
"source": [
|
49 |
+
"# mainly consists of the classes present in the coco dataset\n",
|
50 |
+
"classes = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',\n",
|
51 |
+
" 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',\n",
|
52 |
+
" 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',\n",
|
53 |
+
" 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',\n",
|
54 |
+
" 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',\n",
|
55 |
+
" 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',\n",
|
56 |
+
" 'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',\n",
|
57 |
+
" 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',\n",
|
58 |
+
" 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',\n",
|
59 |
+
" 'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',\n",
|
60 |
+
" 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',\n",
|
61 |
+
" 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n",
|
62 |
+
"\n",
|
63 |
+
"colors = np.random.uniform(0,255, size=(len(classes), 3)) # assigning a color to each classes of the data"
|
64 |
+
]
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"cell_type": "code",
|
68 |
+
"execution_count": 4,
|
69 |
+
"metadata": {
|
70 |
+
"colab": {
|
71 |
+
"base_uri": "https://localhost:8080/",
|
72 |
+
"height": 225,
|
73 |
+
"referenced_widgets": [
|
74 |
+
"4fe268bf915b44038ba07a91c87a63ec",
|
75 |
+
"4e5a3302234341e0ba24b9054b129f99",
|
76 |
+
"8b1891821afc478684249870a93c961c",
|
77 |
+
"3cb0c74f631f49b4bcc3ffcdae17561f",
|
78 |
+
"7ee00c90f28643d697e697130f8e57b9",
|
79 |
+
"d02b913a20794bb7bb3c866370dcc7af",
|
80 |
+
"c7c1a147321d46f3ae3cffc419a04e2e",
|
81 |
+
"ecf3b69a10054324933edcaa322fcbfc",
|
82 |
+
"38f2071ea44a4ea991cae2303f70912d",
|
83 |
+
"541293c130504c69bb5ccf82780670d1",
|
84 |
+
"8ff4b383565e4e8c85c796561bd8a532"
|
85 |
+
]
|
86 |
+
},
|
87 |
+
"id": "x55IKiUr6Rfc",
|
88 |
+
"outputId": "e98c3095-2629-47ac-f6d9-e6640263b111"
|
89 |
+
},
|
90 |
+
"outputs": [
|
91 |
+
{
|
92 |
+
"name": "stderr",
|
93 |
+
"output_type": "stream",
|
94 |
+
"text": [
|
95 |
+
"C:\\Users\\Testys\\anaconda3\\lib\\site-packages\\torchvision\\models\\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead.\n",
|
96 |
+
" warnings.warn(\n",
|
97 |
+
"C:\\Users\\Testys\\anaconda3\\lib\\site-packages\\torchvision\\models\\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1`. You can also use `weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT` to get the most up-to-date weights.\n",
|
98 |
+
" warnings.warn(msg)\n",
|
99 |
+
"C:\\Users\\Testys\\anaconda3\\lib\\site-packages\\torchvision\\models\\_utils.py:208: UserWarning: The parameter 'pretrained_backbone' is deprecated since 0.13 and may be removed in the future, please use 'weights_backbone' instead.\n",
|
100 |
+
" warnings.warn(\n",
|
101 |
+
"C:\\Users\\Testys\\anaconda3\\lib\\site-packages\\torchvision\\models\\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights_backbone' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights_backbone=ResNet50_Weights.IMAGENET1K_V1`. You can also use `weights_backbone=ResNet50_Weights.DEFAULT` to get the most up-to-date weights.\n",
|
102 |
+
" warnings.warn(msg)\n"
|
103 |
+
]
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"data": {
|
107 |
+
"text/plain": "FasterRCNN(\n (transform): GeneralizedRCNNTransform(\n Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n Resize(min_size=(800,), max_size=1333, mode='bilinear')\n )\n (backbone): BackboneWithFPN(\n (body): IntermediateLayerGetter(\n (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n (bn1): FrozenBatchNorm2d(64, eps=0.0)\n (relu): ReLU(inplace=True)\n (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n (layer1): Sequential(\n (0): Bottleneck(\n (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(64, eps=0.0)\n (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(64, eps=0.0)\n (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(256, eps=0.0)\n (relu): ReLU(inplace=True)\n (downsample): Sequential(\n (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (1): FrozenBatchNorm2d(256, eps=0.0)\n )\n )\n (1): Bottleneck(\n (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(64, eps=0.0)\n (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(64, eps=0.0)\n (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(256, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (2): Bottleneck(\n (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(64, eps=0.0)\n (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(64, eps=0.0)\n (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(256, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n )\n (layer2): Sequential(\n (0): Bottleneck(\n (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(128, eps=0.0)\n (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(128, eps=0.0)\n (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(512, eps=0.0)\n (relu): ReLU(inplace=True)\n (downsample): Sequential(\n (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n (1): FrozenBatchNorm2d(512, eps=0.0)\n )\n )\n (1): Bottleneck(\n (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(128, eps=0.0)\n (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(128, eps=0.0)\n (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(512, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (2): Bottleneck(\n (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(128, eps=0.0)\n (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(128, eps=0.0)\n (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(512, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (3): Bottleneck(\n (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(128, eps=0.0)\n (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(128, eps=0.0)\n (conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(512, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n )\n (layer3): Sequential(\n (0): Bottleneck(\n (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(256, eps=0.0)\n (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(256, eps=0.0)\n (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n (relu): ReLU(inplace=True)\n (downsample): Sequential(\n (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n (1): FrozenBatchNorm2d(1024, eps=0.0)\n )\n )\n (1): Bottleneck(\n (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(256, eps=0.0)\n (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(256, eps=0.0)\n (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (2): Bottleneck(\n (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(256, eps=0.0)\n (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(256, eps=0.0)\n (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (3): Bottleneck(\n (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(256, eps=0.0)\n (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(256, eps=0.0)\n (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (4): Bottleneck(\n (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(256, eps=0.0)\n (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(256, eps=0.0)\n (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (5): Bottleneck(\n (conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(256, eps=0.0)\n (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(256, eps=0.0)\n (conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(1024, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n )\n (layer4): Sequential(\n (0): Bottleneck(\n (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(512, eps=0.0)\n (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(512, eps=0.0)\n (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(2048, eps=0.0)\n (relu): ReLU(inplace=True)\n (downsample): Sequential(\n (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n (1): FrozenBatchNorm2d(2048, eps=0.0)\n )\n )\n (1): Bottleneck(\n (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(512, eps=0.0)\n (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(512, eps=0.0)\n (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(2048, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n (2): Bottleneck(\n (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn1): FrozenBatchNorm2d(512, eps=0.0)\n (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n (bn2): FrozenBatchNorm2d(512, eps=0.0)\n (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n (bn3): FrozenBatchNorm2d(2048, eps=0.0)\n (relu): ReLU(inplace=True)\n )\n )\n )\n (fpn): FeaturePyramidNetwork(\n (inner_blocks): ModuleList(\n (0): Conv2dNormActivation(\n (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))\n )\n (1): Conv2dNormActivation(\n (0): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))\n )\n (2): Conv2dNormActivation(\n (0): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))\n )\n (3): Conv2dNormActivation(\n (0): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))\n )\n )\n (layer_blocks): ModuleList(\n (0-3): 4 x Conv2dNormActivation(\n (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n )\n )\n (extra_blocks): LastLevelMaxPool()\n )\n )\n (rpn): RegionProposalNetwork(\n (anchor_generator): AnchorGenerator()\n (head): RPNHead(\n (conv): Sequential(\n (0): Conv2dNormActivation(\n (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))\n (1): ReLU(inplace=True)\n )\n )\n (cls_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1))\n (bbox_pred): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))\n )\n )\n (roi_heads): RoIHeads(\n (box_roi_pool): MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'], output_size=(7, 7), sampling_ratio=2)\n (box_head): TwoMLPHead(\n (fc6): Linear(in_features=12544, out_features=1024, bias=True)\n (fc7): Linear(in_features=1024, out_features=1024, bias=True)\n )\n (box_predictor): FastRCNNPredictor(\n (cls_score): Linear(in_features=1024, out_features=91, bias=True)\n (bbox_pred): Linear(in_features=1024, out_features=364, bias=True)\n )\n )\n)"
|
108 |
+
},
|
109 |
+
"execution_count": 4,
|
110 |
+
"metadata": {},
|
111 |
+
"output_type": "execute_result"
|
112 |
+
}
|
113 |
+
],
|
114 |
+
"source": [
|
115 |
+
"# calling the Faster-RCNN ResNet50 model\n",
|
116 |
+
"model = detection.fasterrcnn_resnet50_fpn(pretrained=True, progress=True, pretrained_backbone=True).to(device)\n",
|
117 |
+
"model.eval() # prints out the architecture of the model"
|
118 |
+
]
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"cell_type": "code",
|
122 |
+
"execution_count": null,
|
123 |
+
"metadata": {
|
124 |
+
"colab": {
|
125 |
+
"background_save": true
|
126 |
+
},
|
127 |
+
"id": "-jLNrDyZ6Rfg",
|
128 |
+
"pycharm": {
|
129 |
+
"is_executing": true
|
130 |
+
}
|
131 |
+
},
|
132 |
+
"outputs": [],
|
133 |
+
"source": [
|
134 |
+
"image = cv2.imread(os.path.join(\".\",\"data\", \"fam1.HEIC\")) # reads the model using OpenCV\n",
|
135 |
+
"\n",
|
136 |
+
"image = cv2.resize(image, (640, 480))\n",
|
137 |
+
"\n",
|
138 |
+
"orig = image.copy()\n",
|
139 |
+
"\n",
|
140 |
+
"\n",
|
141 |
+
"image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # changing the colorspace from BGR to RGB (since Pytorch trains only RGB image)\n",
|
142 |
+
"image = image.transpose((2, 0, 1)) # swapping the color channels from channels last to channels first\n",
|
143 |
+
"\n",
|
144 |
+
"image = np.expand_dims(image, axis=0) # add batch dimension to the image\n",
|
145 |
+
"image = image / 255.0 # scaling image from (0,255) to (0,1)\n",
|
146 |
+
"image = torch.FloatTensor(image) # changes the numpy array to a tensor.\n",
|
147 |
+
"\n",
|
148 |
+
"\n",
|
149 |
+
"image= image.to(device)\n",
|
150 |
+
"# the image is passed to the model to get the bounding boxes\n",
|
151 |
+
"detections = model(image)[0]"
|
152 |
+
]
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"cell_type": "code",
|
156 |
+
"execution_count": 8,
|
157 |
+
"metadata": {
|
158 |
+
"colab": {
|
159 |
+
"background_save": true
|
160 |
+
},
|
161 |
+
"id": "0FcDqO0l6Rfi",
|
162 |
+
"outputId": "0b988da8-7b38-499b-fc3e-2ca7ffdd629f"
|
163 |
+
},
|
164 |
+
"outputs": [
|
165 |
+
{
|
166 |
+
"name": "stdout",
|
167 |
+
"output_type": "stream",
|
168 |
+
"text": [
|
169 |
+
"[INFO] person, 1: 98.54804992675781%\n",
|
170 |
+
"[INFO] person, 1: 98.00418090820312%\n",
|
171 |
+
"[INFO] person, 1: 88.78005981445312%\n",
|
172 |
+
"[INFO] person, 1: 80.33210754394531%\n",
|
173 |
+
"[INFO] person, 1: 78.6150894165039%\n"
|
174 |
+
]
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"data": {
|
178 |
+
"text/plain": "32"
|
179 |
+
},
|
180 |
+
"execution_count": 8,
|
181 |
+
"metadata": {},
|
182 |
+
"output_type": "execute_result"
|
183 |
+
}
|
184 |
+
],
|
185 |
+
"source": [
|
186 |
+
"people = 0\n",
|
187 |
+
"for i in range(0, len(detections[\"boxes\"])):\n",
|
188 |
+
" confidence = detections[\"scores\"][i] # get confidence score of each object in the image\n",
|
189 |
+
" idx = int(detections[\"labels\"][i]) # identifying the id of each of the classes in the image\n",
|
190 |
+
" box = detections[\"boxes\"][i].detach().cpu().numpy() # gets the coordinates for the bounding boxes\n",
|
191 |
+
" (X_1, Y_1, X_2, Y_2) = box.astype(\"int\")\n",
|
192 |
+
"\n",
|
193 |
+
" if confidence > 0.75 and idx == 1:\n",
|
194 |
+
" # matching the label index with its classes and its probability\n",
|
195 |
+
" label = f\"{classes[idx]}, {idx}: {confidence* 100}%\"\n",
|
196 |
+
" print(f\"[INFO] {label}\")\n",
|
197 |
+
" people += 1\n",
|
198 |
+
" cv2.rectangle(orig, (X_1, Y_1), (X_2, Y_2), colors[idx], 2) # draw bounding boxes over each object\n",
|
199 |
+
" y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15\n",
|
200 |
+
"\n",
|
201 |
+
" # adds the label text to the image.\n",
|
202 |
+
" cv2.putText(orig, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)\n",
|
203 |
+
" print(f\"People: {people}\")\n",
|
204 |
+
" cv2.putText(orig, f\"Number of People: {people}\", (5, 19), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)\n",
|
205 |
+
"\n",
|
206 |
+
"cv2.imwrite(\"./data/detected_img.jpg\", orig)\n",
|
207 |
+
"cv2.imshow(\"Image Detection\", orig)\n",
|
208 |
+
"cv2.waitKey(0)"
|
209 |
+
]
|
210 |
+
},
|
211 |
+
{
|
212 |
+
"cell_type": "code",
|
213 |
+
"execution_count": 7,
|
214 |
+
"metadata": {
|
215 |
+
"colab": {
|
216 |
+
"background_save": true
|
217 |
+
},
|
218 |
+
"id": "Vf0X-Azk6Rfk"
|
219 |
+
},
|
220 |
+
"outputs": [],
|
221 |
+
"source": [
|
222 |
+
"# implementation for videos\n",
|
223 |
+
"def video_processing(video_path):\n",
|
224 |
+
" video = cv2.VideoCapture(video_path)\n",
|
225 |
+
" frame_width = int(video.get(3))\n",
|
226 |
+
" frame_height = int(video.get(4))\n",
|
227 |
+
" \n",
|
228 |
+
" # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.\n",
|
229 |
+
" out = cv2.VideoWriter('./data/outpy.mp4',cv2.VideoWriter_fourcc('M','J','P','G'), 10, (frame_width,frame_height))\n",
|
230 |
+
"\n",
|
231 |
+
" while video.isOpened():\n",
|
232 |
+
" ret, frame = video.read()\n",
|
233 |
+
" vid = frame.copy()\n",
|
234 |
+
" if not ret:\n",
|
235 |
+
" break\n",
|
236 |
+
" frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n",
|
237 |
+
" frame = transforms.functional.to_tensor(frame)\n",
|
238 |
+
" frame = frame.to(device)\n",
|
239 |
+
" vid_detect = model([frame])[0] \n",
|
240 |
+
"\n",
|
241 |
+
" # return vid, vid_detect\n",
|
242 |
+
" for i in range(0, len(vid_detect[\"boxes\"])):\n",
|
243 |
+
" confidence = vid_detect[\"scores\"][i]\n",
|
244 |
+
"\n",
|
245 |
+
" if confidence > 0.75:\n",
|
246 |
+
" idx = int(vid_detect[\"labels\"][i])\n",
|
247 |
+
" box = vid_detect[\"boxes\"][i].detach().cpu().numpy()\n",
|
248 |
+
" (X_1, Y_1, X_2, Y_2) = box.astype(\"int\")\n",
|
249 |
+
"\n",
|
250 |
+
" label = f\"{classes[idx]}, {idx}: {confidence* 100}%\"\n",
|
251 |
+
" print(f\"[INFO] {label}\")\n",
|
252 |
+
"\n",
|
253 |
+
" cv2.rectangle(vid, (X_1, Y_1), (X_2, Y_2), colors[idx], 2)\n",
|
254 |
+
" y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15\n",
|
255 |
+
"\n",
|
256 |
+
" cv2.putText(vid, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)\n",
|
257 |
+
"\n",
|
258 |
+
" # cv2.imwrite(\"vid_detection.mp4\", vid)\n",
|
259 |
+
" \n",
|
260 |
+
" # cv2.imshow(\"Video Detection\", vid)\n",
|
261 |
+
" # key = cv2.waitKey(40) & 0xFF\n",
|
262 |
+
" # if key == ord(\" \"):\n",
|
263 |
+
" # break\n",
|
264 |
+
" \n",
|
265 |
+
" out.release()\n",
|
266 |
+
" cv2.destroyAllWindows()\n"
|
267 |
+
]
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"cell_type": "code",
|
271 |
+
"execution_count": null,
|
272 |
+
"metadata": {
|
273 |
+
"colab": {
|
274 |
+
"background_save": true
|
275 |
+
},
|
276 |
+
"id": "pvzjfhUM6Rfm",
|
277 |
+
"pycharm": {
|
278 |
+
"is_executing": true
|
279 |
+
}
|
280 |
+
},
|
281 |
+
"outputs": [
|
282 |
+
{
|
283 |
+
"name": "stdout",
|
284 |
+
"output_type": "stream",
|
285 |
+
"text": [
|
286 |
+
"[INFO] person, 1: 99.96479797363281%\n",
|
287 |
+
"[INFO] person, 1: 99.8873291015625%\n",
|
288 |
+
"[INFO] tie, 32: 84.8359375%\n",
|
289 |
+
"[INFO] skateboard, 41: 84.439697265625%\n",
|
290 |
+
"[INFO] person, 1: 99.8567123413086%\n",
|
291 |
+
"[INFO] tie, 32: 88.97736358642578%\n",
|
292 |
+
"[INFO] person, 1: 99.85549926757812%\n",
|
293 |
+
"[INFO] tie, 32: 85.5542221069336%\n",
|
294 |
+
"[INFO] person, 1: 99.85945892333984%\n",
|
295 |
+
"[INFO] tie, 32: 76.01469421386719%\n",
|
296 |
+
"[INFO] person, 1: 99.91976928710938%\n",
|
297 |
+
"[INFO] tie, 32: 75.80422973632812%\n",
|
298 |
+
"[INFO] person, 1: 99.92200469970703%\n",
|
299 |
+
"[INFO] person, 1: 99.91909790039062%\n",
|
300 |
+
"[INFO] person, 1: 99.87517547607422%\n",
|
301 |
+
"[INFO] person, 1: 99.95360565185547%\n",
|
302 |
+
"[INFO] person, 1: 99.94219207763672%\n",
|
303 |
+
"[INFO] person, 1: 99.911865234375%\n",
|
304 |
+
"[INFO] person, 1: 99.9067153930664%\n",
|
305 |
+
"[INFO] person, 1: 99.92170715332031%\n",
|
306 |
+
"[INFO] person, 1: 99.92486572265625%\n",
|
307 |
+
"[INFO] tie, 32: 81.37924194335938%\n",
|
308 |
+
"[INFO] person, 1: 99.9447021484375%\n",
|
309 |
+
"[INFO] person, 1: 99.90923309326172%\n",
|
310 |
+
"[INFO] person, 1: 99.9104232788086%\n",
|
311 |
+
"[INFO] person, 1: 99.9041976928711%\n",
|
312 |
+
"[INFO] person, 1: 99.92522430419922%\n",
|
313 |
+
"[INFO] person, 1: 99.90491485595703%\n",
|
314 |
+
"[INFO] person, 1: 99.91775512695312%\n",
|
315 |
+
"[INFO] person, 1: 99.91704559326172%\n",
|
316 |
+
"[INFO] person, 1: 99.8878173828125%\n",
|
317 |
+
"[INFO] person, 1: 99.89239501953125%\n",
|
318 |
+
"[INFO] person, 1: 99.89933013916016%\n",
|
319 |
+
"[INFO] tie, 32: 77.39177703857422%\n",
|
320 |
+
"[INFO] person, 1: 99.92431640625%\n",
|
321 |
+
"[INFO] tie, 32: 86.86376190185547%\n",
|
322 |
+
"[INFO] person, 1: 99.9134750366211%\n",
|
323 |
+
"[INFO] tie, 32: 90.91716766357422%\n",
|
324 |
+
"[INFO] person, 1: 99.86553192138672%\n",
|
325 |
+
"[INFO] tie, 32: 83.31379699707031%\n",
|
326 |
+
"[INFO] person, 1: 99.89295959472656%\n",
|
327 |
+
"[INFO] tie, 32: 82.80715942382812%\n",
|
328 |
+
"[INFO] person, 1: 99.93063354492188%\n",
|
329 |
+
"[INFO] tie, 32: 84.20972442626953%\n",
|
330 |
+
"[INFO] person, 1: 99.90483856201172%\n",
|
331 |
+
"[INFO] tie, 32: 86.09645080566406%\n",
|
332 |
+
"[INFO] person, 1: 99.91349792480469%\n",
|
333 |
+
"[INFO] tie, 32: 86.98382568359375%\n",
|
334 |
+
"[INFO] person, 1: 99.92369079589844%\n",
|
335 |
+
"[INFO] tie, 32: 80.03042602539062%\n",
|
336 |
+
"[INFO] person, 1: 99.92707061767578%\n",
|
337 |
+
"[INFO] tie, 32: 91.95155334472656%\n",
|
338 |
+
"[INFO] person, 1: 99.92926025390625%\n",
|
339 |
+
"[INFO] tie, 32: 88.84310150146484%\n",
|
340 |
+
"[INFO] person, 1: 99.93949890136719%\n",
|
341 |
+
"[INFO] tie, 32: 92.20435333251953%\n",
|
342 |
+
"[INFO] person, 1: 99.94140625%\n",
|
343 |
+
"[INFO] tie, 32: 91.74589538574219%\n",
|
344 |
+
"[INFO] person, 1: 99.87340545654297%\n",
|
345 |
+
"[INFO] person, 1: 99.90251159667969%\n",
|
346 |
+
"[INFO] tie, 32: 77.56298065185547%\n",
|
347 |
+
"[INFO] person, 1: 99.92112731933594%\n",
|
348 |
+
"[INFO] tie, 32: 88.31008911132812%\n",
|
349 |
+
"[INFO] person, 1: 99.84620666503906%\n",
|
350 |
+
"[INFO] person, 1: 99.82284545898438%\n",
|
351 |
+
"[INFO] person, 1: 75.87650299072266%\n",
|
352 |
+
"[INFO] person, 1: 99.90447235107422%\n",
|
353 |
+
"[INFO] person, 1: 99.90357971191406%\n",
|
354 |
+
"[INFO] tie, 32: 87.86799621582031%\n",
|
355 |
+
"[INFO] person, 1: 99.89325714111328%\n",
|
356 |
+
"[INFO] person, 1: 99.89271545410156%\n",
|
357 |
+
"[INFO] tie, 32: 85.549560546875%\n",
|
358 |
+
"[INFO] person, 1: 99.90028381347656%\n",
|
359 |
+
"[INFO] tie, 32: 89.2087173461914%\n",
|
360 |
+
"[INFO] person, 1: 99.81275177001953%\n",
|
361 |
+
"[INFO] tie, 32: 92.40033721923828%\n",
|
362 |
+
"[INFO] person, 1: 99.83216857910156%\n",
|
363 |
+
"[INFO] tie, 32: 90.578857421875%\n",
|
364 |
+
"[INFO] person, 1: 99.80609893798828%\n",
|
365 |
+
"[INFO] skateboard, 41: 96.36973571777344%\n",
|
366 |
+
"[INFO] tie, 32: 96.11731719970703%\n",
|
367 |
+
"[INFO] skateboard, 41: 82.39974212646484%\n",
|
368 |
+
"[INFO] person, 1: 99.69965362548828%\n",
|
369 |
+
"[INFO] tie, 32: 95.91572570800781%\n",
|
370 |
+
"[INFO] person, 1: 99.69015502929688%\n",
|
371 |
+
"[INFO] tie, 32: 96.45115661621094%\n",
|
372 |
+
"[INFO] person, 1: 99.815673828125%\n",
|
373 |
+
"[INFO] tie, 32: 96.90648651123047%\n",
|
374 |
+
"[INFO] person, 1: 99.90100860595703%\n",
|
375 |
+
"[INFO] tie, 32: 85.73446655273438%\n",
|
376 |
+
"[INFO] person, 1: 99.87108612060547%\n",
|
377 |
+
"[INFO] tie, 32: 83.69398498535156%\n",
|
378 |
+
"[INFO] person, 1: 99.92547607421875%\n",
|
379 |
+
"[INFO] tie, 32: 81.19554138183594%\n",
|
380 |
+
"[INFO] person, 1: 99.88804626464844%\n",
|
381 |
+
"[INFO] person, 1: 99.8199462890625%\n",
|
382 |
+
"[INFO] tie, 32: 82.75387573242188%\n",
|
383 |
+
"[INFO] person, 1: 99.88672637939453%\n",
|
384 |
+
"[INFO] tie, 32: 88.2742919921875%\n",
|
385 |
+
"[INFO] person, 1: 99.87617492675781%\n",
|
386 |
+
"[INFO] tie, 32: 97.07515716552734%\n",
|
387 |
+
"[INFO] person, 1: 99.87181091308594%\n",
|
388 |
+
"[INFO] tie, 32: 98.3101577758789%\n",
|
389 |
+
"[INFO] person, 1: 99.88245391845703%\n",
|
390 |
+
"[INFO] tie, 32: 97.0067138671875%\n",
|
391 |
+
"[INFO] person, 1: 99.80060577392578%\n",
|
392 |
+
"[INFO] tie, 32: 97.07807922363281%\n",
|
393 |
+
"[INFO] person, 1: 99.88915252685547%\n",
|
394 |
+
"[INFO] tie, 32: 92.22811126708984%\n",
|
395 |
+
"[INFO] person, 1: 99.92195129394531%\n",
|
396 |
+
"[INFO] tie, 32: 89.54218292236328%\n",
|
397 |
+
"[INFO] person, 1: 99.9444580078125%\n",
|
398 |
+
"[INFO] tie, 32: 88.38265228271484%\n",
|
399 |
+
"[INFO] person, 1: 99.93598937988281%\n",
|
400 |
+
"[INFO] tie, 32: 85.4147720336914%\n",
|
401 |
+
"[INFO] person, 1: 99.80413055419922%\n",
|
402 |
+
"[INFO] person, 1: 99.83075714111328%\n",
|
403 |
+
"[INFO] person, 1: 99.8851089477539%\n",
|
404 |
+
"[INFO] tie, 32: 83.0019760131836%\n",
|
405 |
+
"[INFO] person, 1: 99.87308502197266%\n",
|
406 |
+
"[INFO] tie, 32: 87.57421112060547%\n",
|
407 |
+
"[INFO] person, 1: 99.92283630371094%\n",
|
408 |
+
"[INFO] tie, 32: 86.23603820800781%\n",
|
409 |
+
"[INFO] person, 1: 99.90347290039062%\n",
|
410 |
+
"[INFO] tie, 32: 75.47425842285156%\n",
|
411 |
+
"[INFO] person, 1: 99.849853515625%\n",
|
412 |
+
"[INFO] person, 1: 99.78257751464844%\n",
|
413 |
+
"[INFO] tie, 32: 84.9027328491211%\n",
|
414 |
+
"[INFO] person, 1: 99.9006118774414%\n",
|
415 |
+
"[INFO] tie, 32: 81.21675872802734%\n",
|
416 |
+
"[INFO] person, 1: 99.89569091796875%\n",
|
417 |
+
"[INFO] tie, 32: 83.1388931274414%\n",
|
418 |
+
"[INFO] person, 1: 99.8825454711914%\n",
|
419 |
+
"[INFO] tie, 32: 84.85138702392578%\n",
|
420 |
+
"[INFO] person, 1: 99.90423583984375%\n",
|
421 |
+
"[INFO] person, 1: 99.91820526123047%\n",
|
422 |
+
"[INFO] person, 1: 99.90950775146484%\n",
|
423 |
+
"[INFO] person, 1: 99.91889953613281%\n",
|
424 |
+
"[INFO] person, 1: 99.88500213623047%\n",
|
425 |
+
"[INFO] tie, 32: 85.9897689819336%\n",
|
426 |
+
"[INFO] person, 1: 99.83943176269531%\n",
|
427 |
+
"[INFO] person, 1: 99.72062683105469%\n",
|
428 |
+
"[INFO] skateboard, 41: 92.7118148803711%\n",
|
429 |
+
"[INFO] person, 1: 99.57286834716797%\n",
|
430 |
+
"[INFO] skateboard, 41: 94.51634216308594%\n",
|
431 |
+
"[INFO] person, 1: 99.65888214111328%\n",
|
432 |
+
"[INFO] skateboard, 41: 93.3429946899414%\n",
|
433 |
+
"[INFO] person, 1: 99.45445251464844%\n",
|
434 |
+
"[INFO] skateboard, 41: 87.95608520507812%\n",
|
435 |
+
"[INFO] person, 1: 99.67414855957031%\n",
|
436 |
+
"[INFO] tie, 32: 89.96955108642578%\n",
|
437 |
+
"[INFO] skateboard, 41: 79.02975463867188%\n",
|
438 |
+
"[INFO] person, 1: 99.56875610351562%\n",
|
439 |
+
"[INFO] person, 1: 99.49411010742188%\n",
|
440 |
+
"[INFO] person, 1: 99.68108367919922%\n",
|
441 |
+
"[INFO] person, 1: 99.48446655273438%\n",
|
442 |
+
"[INFO] person, 1: 99.75424194335938%\n",
|
443 |
+
"[INFO] person, 1: 99.55342102050781%\n",
|
444 |
+
"[INFO] person, 1: 99.61231231689453%\n",
|
445 |
+
"[INFO] person, 1: 99.55381774902344%\n",
|
446 |
+
"[INFO] person, 1: 99.65033721923828%\n",
|
447 |
+
"[INFO] person, 1: 99.75348663330078%\n",
|
448 |
+
"[INFO] person, 1: 99.45124053955078%\n",
|
449 |
+
"[INFO] person, 1: 99.50060272216797%\n",
|
450 |
+
"[INFO] person, 1: 99.36222076416016%\n",
|
451 |
+
"[INFO] person, 1: 99.62764739990234%\n",
|
452 |
+
"[INFO] person, 1: 99.55170440673828%\n",
|
453 |
+
"[INFO] person, 1: 99.5832290649414%\n",
|
454 |
+
"[INFO] person, 1: 99.52233123779297%\n",
|
455 |
+
"[INFO] person, 1: 99.27761840820312%\n",
|
456 |
+
"[INFO] person, 1: 78.00102233886719%\n",
|
457 |
+
"[INFO] person, 1: 99.5089111328125%\n",
|
458 |
+
"[INFO] person, 1: 99.4920883178711%\n",
|
459 |
+
"[INFO] person, 1: 99.27015686035156%\n"
|
460 |
+
]
|
461 |
+
}
|
462 |
+
],
|
463 |
+
"source": [
|
464 |
+
"video_processing(\"./data/a1.mp4\")"
|
465 |
+
]
|
466 |
+
},
|
467 |
+
{
|
468 |
+
"cell_type": "code",
|
469 |
+
"execution_count": null,
|
470 |
+
"metadata": {
|
471 |
+
"id": "_cKB9rwY6Rfo",
|
472 |
+
"pycharm": {
|
473 |
+
"is_executing": true
|
474 |
+
}
|
475 |
+
},
|
476 |
+
"outputs": [],
|
477 |
+
"source": [
|
478 |
+
"vid= cv2.VideoCapture(\"ai.mp4\")\n",
|
479 |
+
"ret = True\n",
|
480 |
+
"while ret:\n",
|
481 |
+
" ret, frame = vid.read()\n",
|
482 |
+
"\n",
|
483 |
+
" if ret:\n",
|
484 |
+
" cv2.imshow(\"Video Window\", frame)\n",
|
485 |
+
" cv2.waitKey(40)\n",
|
486 |
+
"\n",
|
487 |
+
"# vid.release()\n",
|
488 |
+
"# cv2.destroyAllWindows()\n"
|
489 |
+
]
|
490 |
+
},
|
491 |
+
{
|
492 |
+
"cell_type": "code",
|
493 |
+
"execution_count": 2,
|
494 |
+
"metadata": {
|
495 |
+
"id": "OHtxLb7kg8tf"
|
496 |
+
},
|
497 |
+
"outputs": [],
|
498 |
+
"source": []
|
499 |
+
},
|
500 |
+
{
|
501 |
+
"cell_type": "code",
|
502 |
+
"execution_count": null,
|
503 |
+
"outputs": [],
|
504 |
+
"source": [],
|
505 |
+
"metadata": {
|
506 |
+
"collapsed": false,
|
507 |
+
"pycharm": {
|
508 |
+
"name": "#%%\n",
|
509 |
+
"is_executing": true
|
510 |
+
}
|
511 |
+
}
|
512 |
+
},
|
513 |
+
{
|
514 |
+
"cell_type": "code",
|
515 |
+
"execution_count": null,
|
516 |
+
"outputs": [],
|
517 |
+
"source": [],
|
518 |
+
"metadata": {
|
519 |
+
"collapsed": false,
|
520 |
+
"pycharm": {
|
521 |
+
"name": "#%%\n",
|
522 |
+
"is_executing": true
|
523 |
+
}
|
524 |
+
}
|
525 |
+
}
|
526 |
+
],
|
527 |
+
"metadata": {
|
528 |
+
"accelerator": "GPU",
|
529 |
+
"colab": {
|
530 |
+
"provenance": []
|
531 |
+
},
|
532 |
+
"gpuClass": "standard",
|
533 |
+
"kernelspec": {
|
534 |
+
"display_name": "Python 3",
|
535 |
+
"language": "python",
|
536 |
+
"name": "python3"
|
537 |
+
},
|
538 |
+
"language_info": {
|
539 |
+
"codemirror_mode": {
|
540 |
+
"name": "ipython",
|
541 |
+
"version": 2
|
542 |
+
},
|
543 |
+
"file_extension": ".py",
|
544 |
+
"mimetype": "text/x-python",
|
545 |
+
"name": "python",
|
546 |
+
"nbconvert_exporter": "python",
|
547 |
+
"pygments_lexer": "ipython2",
|
548 |
+
"version": "2.7.6"
|
549 |
+
},
|
550 |
+
"widgets": {
|
551 |
+
"application/vnd.jupyter.widget-state+json": {
|
552 |
+
"38f2071ea44a4ea991cae2303f70912d": {
|
553 |
+
"model_module": "@jupyter-widgets/controls",
|
554 |
+
"model_module_version": "1.5.0",
|
555 |
+
"model_name": "ProgressStyleModel",
|
556 |
+
"state": {
|
557 |
+
"_model_module": "@jupyter-widgets/controls",
|
558 |
+
"_model_module_version": "1.5.0",
|
559 |
+
"_model_name": "ProgressStyleModel",
|
560 |
+
"_view_count": null,
|
561 |
+
"_view_module": "@jupyter-widgets/base",
|
562 |
+
"_view_module_version": "1.2.0",
|
563 |
+
"_view_name": "StyleView",
|
564 |
+
"bar_color": null,
|
565 |
+
"description_width": ""
|
566 |
+
}
|
567 |
+
},
|
568 |
+
"3cb0c74f631f49b4bcc3ffcdae17561f": {
|
569 |
+
"model_module": "@jupyter-widgets/controls",
|
570 |
+
"model_module_version": "1.5.0",
|
571 |
+
"model_name": "HTMLModel",
|
572 |
+
"state": {
|
573 |
+
"_dom_classes": [],
|
574 |
+
"_model_module": "@jupyter-widgets/controls",
|
575 |
+
"_model_module_version": "1.5.0",
|
576 |
+
"_model_name": "HTMLModel",
|
577 |
+
"_view_count": null,
|
578 |
+
"_view_module": "@jupyter-widgets/controls",
|
579 |
+
"_view_module_version": "1.5.0",
|
580 |
+
"_view_name": "HTMLView",
|
581 |
+
"description": "",
|
582 |
+
"description_tooltip": null,
|
583 |
+
"layout": "IPY_MODEL_541293c130504c69bb5ccf82780670d1",
|
584 |
+
"placeholder": "",
|
585 |
+
"style": "IPY_MODEL_8ff4b383565e4e8c85c796561bd8a532",
|
586 |
+
"value": " 160M/160M [00:02<00:00, 73.2MB/s]"
|
587 |
+
}
|
588 |
+
},
|
589 |
+
"4e5a3302234341e0ba24b9054b129f99": {
|
590 |
+
"model_module": "@jupyter-widgets/controls",
|
591 |
+
"model_module_version": "1.5.0",
|
592 |
+
"model_name": "HTMLModel",
|
593 |
+
"state": {
|
594 |
+
"_dom_classes": [],
|
595 |
+
"_model_module": "@jupyter-widgets/controls",
|
596 |
+
"_model_module_version": "1.5.0",
|
597 |
+
"_model_name": "HTMLModel",
|
598 |
+
"_view_count": null,
|
599 |
+
"_view_module": "@jupyter-widgets/controls",
|
600 |
+
"_view_module_version": "1.5.0",
|
601 |
+
"_view_name": "HTMLView",
|
602 |
+
"description": "",
|
603 |
+
"description_tooltip": null,
|
604 |
+
"layout": "IPY_MODEL_d02b913a20794bb7bb3c866370dcc7af",
|
605 |
+
"placeholder": "",
|
606 |
+
"style": "IPY_MODEL_c7c1a147321d46f3ae3cffc419a04e2e",
|
607 |
+
"value": "100%"
|
608 |
+
}
|
609 |
+
},
|
610 |
+
"4fe268bf915b44038ba07a91c87a63ec": {
|
611 |
+
"model_module": "@jupyter-widgets/controls",
|
612 |
+
"model_module_version": "1.5.0",
|
613 |
+
"model_name": "HBoxModel",
|
614 |
+
"state": {
|
615 |
+
"_dom_classes": [],
|
616 |
+
"_model_module": "@jupyter-widgets/controls",
|
617 |
+
"_model_module_version": "1.5.0",
|
618 |
+
"_model_name": "HBoxModel",
|
619 |
+
"_view_count": null,
|
620 |
+
"_view_module": "@jupyter-widgets/controls",
|
621 |
+
"_view_module_version": "1.5.0",
|
622 |
+
"_view_name": "HBoxView",
|
623 |
+
"box_style": "",
|
624 |
+
"children": [
|
625 |
+
"IPY_MODEL_4e5a3302234341e0ba24b9054b129f99",
|
626 |
+
"IPY_MODEL_8b1891821afc478684249870a93c961c",
|
627 |
+
"IPY_MODEL_3cb0c74f631f49b4bcc3ffcdae17561f"
|
628 |
+
],
|
629 |
+
"layout": "IPY_MODEL_7ee00c90f28643d697e697130f8e57b9"
|
630 |
+
}
|
631 |
+
},
|
632 |
+
"541293c130504c69bb5ccf82780670d1": {
|
633 |
+
"model_module": "@jupyter-widgets/base",
|
634 |
+
"model_module_version": "1.2.0",
|
635 |
+
"model_name": "LayoutModel",
|
636 |
+
"state": {
|
637 |
+
"_model_module": "@jupyter-widgets/base",
|
638 |
+
"_model_module_version": "1.2.0",
|
639 |
+
"_model_name": "LayoutModel",
|
640 |
+
"_view_count": null,
|
641 |
+
"_view_module": "@jupyter-widgets/base",
|
642 |
+
"_view_module_version": "1.2.0",
|
643 |
+
"_view_name": "LayoutView",
|
644 |
+
"align_content": null,
|
645 |
+
"align_items": null,
|
646 |
+
"align_self": null,
|
647 |
+
"border": null,
|
648 |
+
"bottom": null,
|
649 |
+
"display": null,
|
650 |
+
"flex": null,
|
651 |
+
"flex_flow": null,
|
652 |
+
"grid_area": null,
|
653 |
+
"grid_auto_columns": null,
|
654 |
+
"grid_auto_flow": null,
|
655 |
+
"grid_auto_rows": null,
|
656 |
+
"grid_column": null,
|
657 |
+
"grid_gap": null,
|
658 |
+
"grid_row": null,
|
659 |
+
"grid_template_areas": null,
|
660 |
+
"grid_template_columns": null,
|
661 |
+
"grid_template_rows": null,
|
662 |
+
"height": null,
|
663 |
+
"justify_content": null,
|
664 |
+
"justify_items": null,
|
665 |
+
"left": null,
|
666 |
+
"margin": null,
|
667 |
+
"max_height": null,
|
668 |
+
"max_width": null,
|
669 |
+
"min_height": null,
|
670 |
+
"min_width": null,
|
671 |
+
"object_fit": null,
|
672 |
+
"object_position": null,
|
673 |
+
"order": null,
|
674 |
+
"overflow": null,
|
675 |
+
"overflow_x": null,
|
676 |
+
"overflow_y": null,
|
677 |
+
"padding": null,
|
678 |
+
"right": null,
|
679 |
+
"top": null,
|
680 |
+
"visibility": null,
|
681 |
+
"width": null
|
682 |
+
}
|
683 |
+
},
|
684 |
+
"7ee00c90f28643d697e697130f8e57b9": {
|
685 |
+
"model_module": "@jupyter-widgets/base",
|
686 |
+
"model_module_version": "1.2.0",
|
687 |
+
"model_name": "LayoutModel",
|
688 |
+
"state": {
|
689 |
+
"_model_module": "@jupyter-widgets/base",
|
690 |
+
"_model_module_version": "1.2.0",
|
691 |
+
"_model_name": "LayoutModel",
|
692 |
+
"_view_count": null,
|
693 |
+
"_view_module": "@jupyter-widgets/base",
|
694 |
+
"_view_module_version": "1.2.0",
|
695 |
+
"_view_name": "LayoutView",
|
696 |
+
"align_content": null,
|
697 |
+
"align_items": null,
|
698 |
+
"align_self": null,
|
699 |
+
"border": null,
|
700 |
+
"bottom": null,
|
701 |
+
"display": null,
|
702 |
+
"flex": null,
|
703 |
+
"flex_flow": null,
|
704 |
+
"grid_area": null,
|
705 |
+
"grid_auto_columns": null,
|
706 |
+
"grid_auto_flow": null,
|
707 |
+
"grid_auto_rows": null,
|
708 |
+
"grid_column": null,
|
709 |
+
"grid_gap": null,
|
710 |
+
"grid_row": null,
|
711 |
+
"grid_template_areas": null,
|
712 |
+
"grid_template_columns": null,
|
713 |
+
"grid_template_rows": null,
|
714 |
+
"height": null,
|
715 |
+
"justify_content": null,
|
716 |
+
"justify_items": null,
|
717 |
+
"left": null,
|
718 |
+
"margin": null,
|
719 |
+
"max_height": null,
|
720 |
+
"max_width": null,
|
721 |
+
"min_height": null,
|
722 |
+
"min_width": null,
|
723 |
+
"object_fit": null,
|
724 |
+
"object_position": null,
|
725 |
+
"order": null,
|
726 |
+
"overflow": null,
|
727 |
+
"overflow_x": null,
|
728 |
+
"overflow_y": null,
|
729 |
+
"padding": null,
|
730 |
+
"right": null,
|
731 |
+
"top": null,
|
732 |
+
"visibility": null,
|
733 |
+
"width": null
|
734 |
+
}
|
735 |
+
},
|
736 |
+
"8b1891821afc478684249870a93c961c": {
|
737 |
+
"model_module": "@jupyter-widgets/controls",
|
738 |
+
"model_module_version": "1.5.0",
|
739 |
+
"model_name": "FloatProgressModel",
|
740 |
+
"state": {
|
741 |
+
"_dom_classes": [],
|
742 |
+
"_model_module": "@jupyter-widgets/controls",
|
743 |
+
"_model_module_version": "1.5.0",
|
744 |
+
"_model_name": "FloatProgressModel",
|
745 |
+
"_view_count": null,
|
746 |
+
"_view_module": "@jupyter-widgets/controls",
|
747 |
+
"_view_module_version": "1.5.0",
|
748 |
+
"_view_name": "ProgressView",
|
749 |
+
"bar_style": "success",
|
750 |
+
"description": "",
|
751 |
+
"description_tooltip": null,
|
752 |
+
"layout": "IPY_MODEL_ecf3b69a10054324933edcaa322fcbfc",
|
753 |
+
"max": 167502836,
|
754 |
+
"min": 0,
|
755 |
+
"orientation": "horizontal",
|
756 |
+
"style": "IPY_MODEL_38f2071ea44a4ea991cae2303f70912d",
|
757 |
+
"value": 167502836
|
758 |
+
}
|
759 |
+
},
|
760 |
+
"8ff4b383565e4e8c85c796561bd8a532": {
|
761 |
+
"model_module": "@jupyter-widgets/controls",
|
762 |
+
"model_module_version": "1.5.0",
|
763 |
+
"model_name": "DescriptionStyleModel",
|
764 |
+
"state": {
|
765 |
+
"_model_module": "@jupyter-widgets/controls",
|
766 |
+
"_model_module_version": "1.5.0",
|
767 |
+
"_model_name": "DescriptionStyleModel",
|
768 |
+
"_view_count": null,
|
769 |
+
"_view_module": "@jupyter-widgets/base",
|
770 |
+
"_view_module_version": "1.2.0",
|
771 |
+
"_view_name": "StyleView",
|
772 |
+
"description_width": ""
|
773 |
+
}
|
774 |
+
},
|
775 |
+
"c7c1a147321d46f3ae3cffc419a04e2e": {
|
776 |
+
"model_module": "@jupyter-widgets/controls",
|
777 |
+
"model_module_version": "1.5.0",
|
778 |
+
"model_name": "DescriptionStyleModel",
|
779 |
+
"state": {
|
780 |
+
"_model_module": "@jupyter-widgets/controls",
|
781 |
+
"_model_module_version": "1.5.0",
|
782 |
+
"_model_name": "DescriptionStyleModel",
|
783 |
+
"_view_count": null,
|
784 |
+
"_view_module": "@jupyter-widgets/base",
|
785 |
+
"_view_module_version": "1.2.0",
|
786 |
+
"_view_name": "StyleView",
|
787 |
+
"description_width": ""
|
788 |
+
}
|
789 |
+
},
|
790 |
+
"d02b913a20794bb7bb3c866370dcc7af": {
|
791 |
+
"model_module": "@jupyter-widgets/base",
|
792 |
+
"model_module_version": "1.2.0",
|
793 |
+
"model_name": "LayoutModel",
|
794 |
+
"state": {
|
795 |
+
"_model_module": "@jupyter-widgets/base",
|
796 |
+
"_model_module_version": "1.2.0",
|
797 |
+
"_model_name": "LayoutModel",
|
798 |
+
"_view_count": null,
|
799 |
+
"_view_module": "@jupyter-widgets/base",
|
800 |
+
"_view_module_version": "1.2.0",
|
801 |
+
"_view_name": "LayoutView",
|
802 |
+
"align_content": null,
|
803 |
+
"align_items": null,
|
804 |
+
"align_self": null,
|
805 |
+
"border": null,
|
806 |
+
"bottom": null,
|
807 |
+
"display": null,
|
808 |
+
"flex": null,
|
809 |
+
"flex_flow": null,
|
810 |
+
"grid_area": null,
|
811 |
+
"grid_auto_columns": null,
|
812 |
+
"grid_auto_flow": null,
|
813 |
+
"grid_auto_rows": null,
|
814 |
+
"grid_column": null,
|
815 |
+
"grid_gap": null,
|
816 |
+
"grid_row": null,
|
817 |
+
"grid_template_areas": null,
|
818 |
+
"grid_template_columns": null,
|
819 |
+
"grid_template_rows": null,
|
820 |
+
"height": null,
|
821 |
+
"justify_content": null,
|
822 |
+
"justify_items": null,
|
823 |
+
"left": null,
|
824 |
+
"margin": null,
|
825 |
+
"max_height": null,
|
826 |
+
"max_width": null,
|
827 |
+
"min_height": null,
|
828 |
+
"min_width": null,
|
829 |
+
"object_fit": null,
|
830 |
+
"object_position": null,
|
831 |
+
"order": null,
|
832 |
+
"overflow": null,
|
833 |
+
"overflow_x": null,
|
834 |
+
"overflow_y": null,
|
835 |
+
"padding": null,
|
836 |
+
"right": null,
|
837 |
+
"top": null,
|
838 |
+
"visibility": null,
|
839 |
+
"width": null
|
840 |
+
}
|
841 |
+
},
|
842 |
+
"ecf3b69a10054324933edcaa322fcbfc": {
|
843 |
+
"model_module": "@jupyter-widgets/base",
|
844 |
+
"model_module_version": "1.2.0",
|
845 |
+
"model_name": "LayoutModel",
|
846 |
+
"state": {
|
847 |
+
"_model_module": "@jupyter-widgets/base",
|
848 |
+
"_model_module_version": "1.2.0",
|
849 |
+
"_model_name": "LayoutModel",
|
850 |
+
"_view_count": null,
|
851 |
+
"_view_module": "@jupyter-widgets/base",
|
852 |
+
"_view_module_version": "1.2.0",
|
853 |
+
"_view_name": "LayoutView",
|
854 |
+
"align_content": null,
|
855 |
+
"align_items": null,
|
856 |
+
"align_self": null,
|
857 |
+
"border": null,
|
858 |
+
"bottom": null,
|
859 |
+
"display": null,
|
860 |
+
"flex": null,
|
861 |
+
"flex_flow": null,
|
862 |
+
"grid_area": null,
|
863 |
+
"grid_auto_columns": null,
|
864 |
+
"grid_auto_flow": null,
|
865 |
+
"grid_auto_rows": null,
|
866 |
+
"grid_column": null,
|
867 |
+
"grid_gap": null,
|
868 |
+
"grid_row": null,
|
869 |
+
"grid_template_areas": null,
|
870 |
+
"grid_template_columns": null,
|
871 |
+
"grid_template_rows": null,
|
872 |
+
"height": null,
|
873 |
+
"justify_content": null,
|
874 |
+
"justify_items": null,
|
875 |
+
"left": null,
|
876 |
+
"margin": null,
|
877 |
+
"max_height": null,
|
878 |
+
"max_width": null,
|
879 |
+
"min_height": null,
|
880 |
+
"min_width": null,
|
881 |
+
"object_fit": null,
|
882 |
+
"object_position": null,
|
883 |
+
"order": null,
|
884 |
+
"overflow": null,
|
885 |
+
"overflow_x": null,
|
886 |
+
"overflow_y": null,
|
887 |
+
"padding": null,
|
888 |
+
"right": null,
|
889 |
+
"top": null,
|
890 |
+
"visibility": null,
|
891 |
+
"width": null
|
892 |
+
}
|
893 |
+
}
|
894 |
+
}
|
895 |
+
}
|
896 |
+
},
|
897 |
+
"nbformat": 4,
|
898 |
+
"nbformat_minor": 0
|
899 |
+
}
|
faster_rcnn.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# importing modules
|
2 |
+
import cv2
|
3 |
+
import torch
|
4 |
+
from torchvision import transforms
|
5 |
+
from torchvision.models import detection
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
# checks if their is a gpu present, if not uses a cpu
|
9 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
10 |
+
|
11 |
+
# mainly consists of the classes present in the coco dataset
|
12 |
+
classes = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
|
13 |
+
'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
|
14 |
+
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
|
15 |
+
'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
|
16 |
+
'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
|
17 |
+
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
|
18 |
+
'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
|
19 |
+
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
|
20 |
+
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
|
21 |
+
'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
|
22 |
+
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
|
23 |
+
'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
|
24 |
+
|
25 |
+
colors = np.random.uniform(0, 255, size=(len(classes), 3)) # assigning a color to each classes of the data
|
26 |
+
|
27 |
+
|
28 |
+
# calling the Faster RCNN ResNet50 model
|
29 |
+
model = detection.fasterrcnn_resnet50_fpn_v2(pretrained=True, progress=True, pretrained_backbone=True).to(device)
|
30 |
+
print(model.eval()) # prints out the architecture of the model
|
31 |
+
|
32 |
+
|
33 |
+
# function to carry out object detection on images.
|
34 |
+
def img_detect(img_path):
|
35 |
+
image = cv2.imread(img_path) # reads the model using OpenCV
|
36 |
+
image = cv2.resize(image, (640, 480))
|
37 |
+
orig = image.copy()
|
38 |
+
|
39 |
+
# changing the colorspace from BGR to RGB (since Pytorch trains only RGB image)
|
40 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
41 |
+
|
42 |
+
image = image.transpose((2, 0, 1)) # swapping the color channels from channels last to channels first
|
43 |
+
|
44 |
+
image = np.expand_dims(image, axis=0) # add batch dimension to the image
|
45 |
+
image = image / 255.0 # scaling image from (0,255) to (0,1)
|
46 |
+
image = torch.FloatTensor(image) # changes the numpy array to a tensor.
|
47 |
+
|
48 |
+
image = image.to(device)
|
49 |
+
detections = model(image)[0] # the image is passed to the model to get the bounding boxes
|
50 |
+
|
51 |
+
people = 0
|
52 |
+
# loop to construct bounding boxes on image.
|
53 |
+
for i in range(0, len(detections["boxes"])):
|
54 |
+
confidence = detections["scores"][i] # get confidence score of each object in the image
|
55 |
+
idx = int(detections["labels"][i]) # identifying the id of each of the classes in the image
|
56 |
+
box = detections["boxes"][i].detach().cpu().numpy() # gets the coordinates for the bounding boxes
|
57 |
+
(X_1, Y_1, X_2, Y_2) = box.astype("int")
|
58 |
+
|
59 |
+
if confidence > 0.75 and idx == 1:
|
60 |
+
# matching the label index with its classes and its probability
|
61 |
+
label = f"{classes[idx]}, {idx}: {confidence* 100}%"
|
62 |
+
print(f"[INFO] {label}")
|
63 |
+
people += 1
|
64 |
+
cv2.rectangle(orig, (X_1, Y_1), (X_2, Y_2), colors[idx], 2) # draw bounding boxes over each object
|
65 |
+
y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15
|
66 |
+
|
67 |
+
# adds the label text to the image.
|
68 |
+
cv2.putText(orig, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
|
69 |
+
cv2.putText(orig, f"Number of People: {people}", (5, 19), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
|
70 |
+
|
71 |
+
return orig
|
72 |
+
|
73 |
+
|
74 |
+
# function to perform object detection in videos
|
75 |
+
def video_detection(video_path):
|
76 |
+
video = cv2.VideoCapture(video_path)
|
77 |
+
# frame_width = video.get(3)
|
78 |
+
# frame_height = video.get(4)
|
79 |
+
|
80 |
+
# out = cv2.VideoWriter(vid_out, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10, (frame_width, frame_height))
|
81 |
+
|
82 |
+
while video.isOpened():
|
83 |
+
ret, frame = video.read()
|
84 |
+
vid = frame.copy()
|
85 |
+
if not ret:
|
86 |
+
break
|
87 |
+
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
88 |
+
frame = transforms.functional.to_tensor(frame)
|
89 |
+
frame = frame.to(device)
|
90 |
+
vid_detect = model([frame])[0]
|
91 |
+
|
92 |
+
for i in range(0, len(vid_detect["boxes"])):
|
93 |
+
confidence = vid_detect["scores"][i]
|
94 |
+
|
95 |
+
if confidence > 0.75:
|
96 |
+
idx = int(vid_detect["labels"][i])
|
97 |
+
box = vid_detect["boxes"][i].detach().cpu().numpy()
|
98 |
+
(X_1, Y_1, X_2, Y_2) = box.astype("int")
|
99 |
+
|
100 |
+
label = f"{classes[idx]}, {idx}: {confidence* 100}%"
|
101 |
+
print(f"[INFO] {label}")
|
102 |
+
|
103 |
+
cv2.rectangle(vid, (X_1, Y_1), (X_2, Y_2), colors[idx], 2)
|
104 |
+
y = Y_1 - 15 if Y_1 - 15 > 15 else Y_1 + 15
|
105 |
+
|
106 |
+
cv2.putText(vid, label, (X_1, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[idx], 2)
|
107 |
+
|
108 |
+
return vid
|
readme.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
# Human Detector and Counter in Image
|
2 |
+
|
3 |
+
This
|