File size: 5,532 Bytes
eccb33b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import cv2 as cv 
import numpy as np
from tts import *

# Distance constants 
KNOWN_DISTANCE = 45 #INCHES
PERSON_WIDTH = 16 #INCHES
MOBILE_WIDTH = 3.0 #INCHES
CHAIR_WIDTH = 20.0

text1 = ""
text2 = ""

# Object detector constant 
CONFIDENCE_THRESHOLD = 0.4
NMS_THRESHOLD = 0.3

# colors for object detected
COLORS = [(255,0,0),(255,0,255),(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)]
GREEN =(0,255,0)
BLACK =(0,0,0)
# defining fonts 
FONTS = cv.FONT_HERSHEY_COMPLEX

# getting class names from classes.txt file 
class_names = []
with open("classes.txt", "r") as f:
    class_names = [cname.strip() for cname in f.readlines()]
#  setttng up opencv net
yoloNet = cv.dnn.readNet('yolov4-tiny.weights', 'yolov4-tiny.cfg')

yoloNet.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
yoloNet.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16)

model = cv.dnn_DetectionModel(yoloNet)
model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)

# object detector funciton /method
def object_detector(image):
    classes, scores, boxes = model.detect(image, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
    # creating empty list to add objects data
    data_list =[]
    for (classid, score, box) in zip(classes, scores, boxes):
        x1,y1,x2,y2 = box
        center_x, center_y =  ( x1 + x2 ) / 2, ( y1 + y2 ) / 2
        height, width, channels = image.shape
        # print(x1,y1,x2,y2)
        # define color of each, object based on its class id 

        if center_x <= width/3:
            W_pos = "left"
        elif center_x <= (width/3 * 2):
            W_pos = "center"
        else:
            W_pos = "right"
        
        if center_y <= height/3:
            H_pos = "top"
        elif center_y <= (height/3 * 2):
            H_pos = "mid"
        else:
            H_pos = "bottom"

        text1 = W_pos
        text2 = H_pos
        color= COLORS[int(classid) % len(COLORS)]

    
        label = "%s : %f" % (class_names[classid], score)

        # draw rectangle on and label on object
        cv.rectangle(image, box, color, 2)
        cv.putText(image, label, (box[0], box[1]-14), FONTS, 0.5, color, 2)

         # getting the data 
        # 1: class name  
        # 2: object width in pixels, 
        # 3: position where have to draw text(distance)

        print("objects identified status")
        print("person identified : ",classid == 0)
        print("mobile identified : ",classid == 67)
        print("chair identified : ",classid == 56)

        # getting the data 
        # 1: class name  2: object width in pixels, 3: position where have to draw text(distance)
        if classid == 0: # person class id 
            data_list.append([class_names[classid], box[2], (box[0], box[1]-2),text1,text2])
        elif classid == 67:
            data_list.append([class_names[classid], box[2], (box[0], box[1]-2),text1,text2])
        elif classid == 56:
            data_list.append([class_names[classid], box[2], (box[0], box[1]-2),text1,text2])
        
        # if you want inclulde more classes then you have to simply add more [elif] statements here
        # returning list containing the object data. 
    return data_list

def focal_length_finder (measured_distance, real_width, width_in_rf):
    focal_length = (width_in_rf * measured_distance) / real_width

    return focal_length

# distance finder function 
def distance_finder(focal_length, real_object_width, width_in_frmae):
    distance = (real_object_width * focal_length) / width_in_frmae
    return distance

# reading the reference image from dir 
ref_person = cv.imread('ReferenceImages/image14.png')
ref_mobile = cv.imread('ReferenceImages/image4.png')
ref_chair = cv.imread('ReferenceImages/image22.png')

mobile_data = object_detector(ref_mobile)
mobile_width_in_rf = mobile_data[1][1]

person_data = object_detector(ref_person)
person_width_in_rf = person_data[0][1]

chair_data = object_detector(ref_person)
chair_width_in_rf = chair_data[0][1]

# print(f"Person width in pixels : {person_width_in_rf} mobile width in pixel: {mobile_width_in_rf}")

# finding focal length 
focal_person = focal_length_finder(KNOWN_DISTANCE, PERSON_WIDTH, person_width_in_rf)
focal_mobile = focal_length_finder(KNOWN_DISTANCE, MOBILE_WIDTH, mobile_width_in_rf)
focal_chair = focal_length_finder(KNOWN_DISTANCE, CHAIR_WIDTH, chair_width_in_rf)

#d[]

def get_frame_output(frame, frame_cnt):
    output_text_file = open('output_text.txt','w') 
    data = object_detector(frame)
    for d in data:
        if d[0] =='person':
            distance = distance_finder(focal_person, PERSON_WIDTH, d[1])
            x, y = d[2]
        elif d[0] =='cell phone':
            distance = distance_finder (focal_mobile, MOBILE_WIDTH, d[1])
            x, y = d[2]
        elif d[0] == 'chair':
            distance = distance_finder (focal_chair, CHAIR_WIDTH, d[1])
            x, y = d[2]
        
        text1,text2=d[3],d[4]

        cv.rectangle(frame, (x, y-3), (x+150, y+23),BLACK,-1 )
        cv.putText(frame, f'Dis: {round(distance,2)} inch', (x+5,y+13), FONTS, 0.48, GREEN, 2)
        
        OUTPUTtEXT=""

        if distance > 100:
            OUTPUTtEXT = "Get closer"

        elif (round(distance) > 50) and (text2 == "mid"):
            OUTPUTtEXT="Go straight"
        else:
            OUTPUTtEXT = (str(d[0]) + " " + str(int(round(distance,1))) +" inches"+" take left or right")
                
        output_text_file.write(OUTPUTtEXT)

        output_text_file.write("\n")
    
    output_text_file.close()
    
    return frame