rahulramar commited on
Commit
eccb33b
1 Parent(s): 656745e

intitial commit

Browse files
Files changed (6) hide show
  1. CaptureReferenceImage.py +69 -0
  2. DistanceEstimation.py +166 -0
  3. Procfile.txt +1 -0
  4. demo_Recoder.py +109 -0
  5. setup.sh.txt +13 -0
  6. tts.py +36 -0
CaptureReferenceImage.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2 as cv
2
+ import time
3
+ # setting parameters
4
+ CONFIDENCE_THRESHOLD = 0.5
5
+ NMS_THRESHOLD = 0.5
6
+
7
+ # colors for object detected
8
+ COLORS = [(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)]
9
+ GREEN = (0, 255, 0)
10
+ RED = (0, 0, 255)
11
+ PINK = (147, 20, 255)
12
+ ORANGE = (0, 69, 255)
13
+ fonts = cv.FONT_HERSHEY_COMPLEX
14
+ # reading class name from text file
15
+ class_names = []
16
+ with open("classes.txt", "r") as f:
17
+ class_names = [cname.strip() for cname in f.readlines()]
18
+ # setttng up opencv net
19
+ yoloNet = cv.dnn.readNet('yolov4-tiny.weights', 'yolov4-tiny.cfg')
20
+
21
+ yoloNet.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
22
+ yoloNet.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16)
23
+
24
+ model = cv.dnn_DetectionModel(yoloNet)
25
+ model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)
26
+
27
+ # setting camera
28
+
29
+
30
+ def ObjectDetector(image):
31
+ classes, scores, boxes = model.detect(
32
+ image, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
33
+
34
+ for (classid, score, box) in zip(classes, scores, boxes):
35
+ color = COLORS[int(classid) % len(COLORS)]
36
+ label = "%s : %f" % (class_names[classid[0]], score)
37
+ cv.rectangle(image, box, color, 2)
38
+ cv.putText(frame, label, (box[0], box[1]-10), fonts, 0.5, color, 2)
39
+
40
+
41
+ camera = cv.VideoCapture(0)
42
+ counter = 0
43
+ capture = False
44
+ number = 0
45
+ while True:
46
+ ret, frame = camera.read()
47
+
48
+ orignal = frame.copy()
49
+ ObjectDetector(frame)
50
+ cv.imshow('oringal', orignal)
51
+
52
+ print(capture == True and counter < 10)
53
+ if capture == True and counter < 10:
54
+ counter += 1
55
+ cv.putText(
56
+ frame, f"Capturing Img No: {number}", (30, 30), fonts, 0.6, PINK, 2)
57
+ else:
58
+ counter = 0
59
+
60
+ cv.imshow('frame', frame)
61
+ key = cv.waitKey(1)
62
+
63
+ if key == ord('c'):
64
+ capture = True
65
+ number += 1
66
+ cv.imwrite(f'ReferenceImages/image{number}.png', orignal)
67
+ if key == ord('q'):
68
+ break
69
+ cv.destroyAllWindows()
DistanceEstimation.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2 as cv
2
+ import numpy as np
3
+ from tts import *
4
+
5
+ # Distance constants
6
+ KNOWN_DISTANCE = 45 #INCHES
7
+ PERSON_WIDTH = 16 #INCHES
8
+ MOBILE_WIDTH = 3.0 #INCHES
9
+ CHAIR_WIDTH = 20.0
10
+
11
+ text1 = ""
12
+ text2 = ""
13
+
14
+ # Object detector constant
15
+ CONFIDENCE_THRESHOLD = 0.4
16
+ NMS_THRESHOLD = 0.3
17
+
18
+ # colors for object detected
19
+ COLORS = [(255,0,0),(255,0,255),(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)]
20
+ GREEN =(0,255,0)
21
+ BLACK =(0,0,0)
22
+ # defining fonts
23
+ FONTS = cv.FONT_HERSHEY_COMPLEX
24
+
25
+ # getting class names from classes.txt file
26
+ class_names = []
27
+ with open("classes.txt", "r") as f:
28
+ class_names = [cname.strip() for cname in f.readlines()]
29
+ # setttng up opencv net
30
+ yoloNet = cv.dnn.readNet('yolov4-tiny.weights', 'yolov4-tiny.cfg')
31
+
32
+ yoloNet.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
33
+ yoloNet.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16)
34
+
35
+ model = cv.dnn_DetectionModel(yoloNet)
36
+ model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)
37
+
38
+ # object detector funciton /method
39
+ def object_detector(image):
40
+ classes, scores, boxes = model.detect(image, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
41
+ # creating empty list to add objects data
42
+ data_list =[]
43
+ for (classid, score, box) in zip(classes, scores, boxes):
44
+ x1,y1,x2,y2 = box
45
+ center_x, center_y = ( x1 + x2 ) / 2, ( y1 + y2 ) / 2
46
+ height, width, channels = image.shape
47
+ # print(x1,y1,x2,y2)
48
+ # define color of each, object based on its class id
49
+
50
+ if center_x <= width/3:
51
+ W_pos = "left"
52
+ elif center_x <= (width/3 * 2):
53
+ W_pos = "center"
54
+ else:
55
+ W_pos = "right"
56
+
57
+ if center_y <= height/3:
58
+ H_pos = "top"
59
+ elif center_y <= (height/3 * 2):
60
+ H_pos = "mid"
61
+ else:
62
+ H_pos = "bottom"
63
+
64
+ text1 = W_pos
65
+ text2 = H_pos
66
+ color= COLORS[int(classid) % len(COLORS)]
67
+
68
+
69
+ label = "%s : %f" % (class_names[classid], score)
70
+
71
+ # draw rectangle on and label on object
72
+ cv.rectangle(image, box, color, 2)
73
+ cv.putText(image, label, (box[0], box[1]-14), FONTS, 0.5, color, 2)
74
+
75
+ # getting the data
76
+ # 1: class name
77
+ # 2: object width in pixels,
78
+ # 3: position where have to draw text(distance)
79
+
80
+ print("objects identified status")
81
+ print("person identified : ",classid == 0)
82
+ print("mobile identified : ",classid == 67)
83
+ print("chair identified : ",classid == 56)
84
+
85
+ # getting the data
86
+ # 1: class name 2: object width in pixels, 3: position where have to draw text(distance)
87
+ if classid == 0: # person class id
88
+ data_list.append([class_names[classid], box[2], (box[0], box[1]-2),text1,text2])
89
+ elif classid == 67:
90
+ data_list.append([class_names[classid], box[2], (box[0], box[1]-2),text1,text2])
91
+ elif classid == 56:
92
+ data_list.append([class_names[classid], box[2], (box[0], box[1]-2),text1,text2])
93
+
94
+ # if you want inclulde more classes then you have to simply add more [elif] statements here
95
+ # returning list containing the object data.
96
+ return data_list
97
+
98
+ def focal_length_finder (measured_distance, real_width, width_in_rf):
99
+ focal_length = (width_in_rf * measured_distance) / real_width
100
+
101
+ return focal_length
102
+
103
+ # distance finder function
104
+ def distance_finder(focal_length, real_object_width, width_in_frmae):
105
+ distance = (real_object_width * focal_length) / width_in_frmae
106
+ return distance
107
+
108
+ # reading the reference image from dir
109
+ ref_person = cv.imread('ReferenceImages/image14.png')
110
+ ref_mobile = cv.imread('ReferenceImages/image4.png')
111
+ ref_chair = cv.imread('ReferenceImages/image22.png')
112
+
113
+ mobile_data = object_detector(ref_mobile)
114
+ mobile_width_in_rf = mobile_data[1][1]
115
+
116
+ person_data = object_detector(ref_person)
117
+ person_width_in_rf = person_data[0][1]
118
+
119
+ chair_data = object_detector(ref_person)
120
+ chair_width_in_rf = chair_data[0][1]
121
+
122
+ # print(f"Person width in pixels : {person_width_in_rf} mobile width in pixel: {mobile_width_in_rf}")
123
+
124
+ # finding focal length
125
+ focal_person = focal_length_finder(KNOWN_DISTANCE, PERSON_WIDTH, person_width_in_rf)
126
+ focal_mobile = focal_length_finder(KNOWN_DISTANCE, MOBILE_WIDTH, mobile_width_in_rf)
127
+ focal_chair = focal_length_finder(KNOWN_DISTANCE, CHAIR_WIDTH, chair_width_in_rf)
128
+
129
+ #d[]
130
+
131
+ def get_frame_output(frame, frame_cnt):
132
+ output_text_file = open('output_text.txt','w')
133
+ data = object_detector(frame)
134
+ for d in data:
135
+ if d[0] =='person':
136
+ distance = distance_finder(focal_person, PERSON_WIDTH, d[1])
137
+ x, y = d[2]
138
+ elif d[0] =='cell phone':
139
+ distance = distance_finder (focal_mobile, MOBILE_WIDTH, d[1])
140
+ x, y = d[2]
141
+ elif d[0] == 'chair':
142
+ distance = distance_finder (focal_chair, CHAIR_WIDTH, d[1])
143
+ x, y = d[2]
144
+
145
+ text1,text2=d[3],d[4]
146
+
147
+ cv.rectangle(frame, (x, y-3), (x+150, y+23),BLACK,-1 )
148
+ cv.putText(frame, f'Dis: {round(distance,2)} inch', (x+5,y+13), FONTS, 0.48, GREEN, 2)
149
+
150
+ OUTPUTtEXT=""
151
+
152
+ if distance > 100:
153
+ OUTPUTtEXT = "Get closer"
154
+
155
+ elif (round(distance) > 50) and (text2 == "mid"):
156
+ OUTPUTtEXT="Go straight"
157
+ else:
158
+ OUTPUTtEXT = (str(d[0]) + " " + str(int(round(distance,1))) +" inches"+" take left or right")
159
+
160
+ output_text_file.write(OUTPUTtEXT)
161
+
162
+ output_text_file.write("\n")
163
+
164
+ output_text_file.close()
165
+
166
+ return frame
Procfile.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ web: sh setup.sh && streamlit run app.py
demo_Recoder.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2 as cv
2
+ import numpy as np
3
+
4
+ # Distance constants
5
+ KNOWN_DISTANCE = 45 #INCHES
6
+ PERSON_WIDTH = 16 #INCHES
7
+ MOBILE_WIDTH = 3.0 #INCHES
8
+
9
+ # Object detector constant
10
+ CONFIDENCE_THRESHOLD = 0.4
11
+ NMS_THRESHOLD = 0.3
12
+
13
+ # colors for object detected
14
+ COLORS = [(255,0,0),(255,0,255),(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)]
15
+ GREEN =(0,255,0)
16
+ BLACK =(0,0,0)
17
+ # defining fonts
18
+ FONTS = cv.FONT_HERSHEY_COMPLEX
19
+
20
+ # getting class names from classes.txt file
21
+ class_names = []
22
+ with open("classes.txt", "r") as f:
23
+ class_names = [cname.strip() for cname in f.readlines()]
24
+ # setttng up opencv net
25
+ yoloNet = cv.dnn.readNet('yolov4-tiny.weights', 'yolov4-tiny.cfg')
26
+
27
+ yoloNet.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
28
+ yoloNet.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16)
29
+
30
+ model = cv.dnn_DetectionModel(yoloNet)
31
+ model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)
32
+
33
+ # object detector funciton /method
34
+ def object_detector(image):
35
+ classes, scores, boxes = model.detect(image, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
36
+ # creating empty list to add objects data
37
+ data_list =[]
38
+ for (classid, score, box) in zip(classes, scores, boxes):
39
+ # define color of each, object based on its class id
40
+ color= COLORS[int(classid) % len(COLORS)]
41
+
42
+ label = "%s : %f" % (class_names[classid[0]], score)
43
+
44
+ # draw rectangle on and label on object
45
+ cv.rectangle(image, box, color, 2)
46
+ cv.putText(image, label, (box[0], box[1]-14), FONTS, 0.5, color, 2)
47
+
48
+ # getting the data
49
+ # 1: class name 2: object width in pixels, 3: position where have to draw text(distance)
50
+ if classid ==0: # person class id
51
+ data_list.append([class_names[classid[0]], box[2], (box[0], box[1]-2)])
52
+ elif classid ==67:
53
+ data_list.append([class_names[classid[0]], box[2], (box[0], box[1]-2)])
54
+ # return list
55
+ return data_list
56
+
57
+ def focal_length_finder (measured_distance, real_width, width_in_rf):
58
+ focal_length = (width_in_rf * measured_distance) / real_width
59
+
60
+ return focal_length
61
+
62
+ # distance finder function
63
+ def distance_finder(focal_length, real_object_width, width_in_frmae):
64
+ distance = (real_object_width * focal_length) / width_in_frmae
65
+ return distance
66
+
67
+ # reading the reference image from dir
68
+ ref_person = cv.imread('ReferenceImages/image14.png')
69
+ ref_mobile = cv.imread('ReferenceImages/image4.png')
70
+
71
+ mobile_data = object_detector(ref_mobile)
72
+ mobile_width_in_rf = mobile_data[1][1]
73
+
74
+ person_data = object_detector(ref_person)
75
+ person_width_in_rf = person_data[0][1]
76
+
77
+ print(f"Person width in pixels : {person_width_in_rf} mobile width in pixel: {mobile_width_in_rf}")
78
+
79
+ # finding focal length
80
+ focal_person = focal_length_finder(KNOWN_DISTANCE, PERSON_WIDTH, person_width_in_rf)
81
+
82
+ focal_mobile = focal_length_finder(KNOWN_DISTANCE, MOBILE_WIDTH, mobile_width_in_rf)
83
+ cap = cv.VideoCapture(3)
84
+ fourcc = cv.VideoWriter_fourcc(*'XVID')
85
+ Recoder = cv.VideoWriter('out.mp4', fourcc,8.0,(int(cap.get(cv.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))) )
86
+ while True:
87
+ ret, frame = cap.read()
88
+
89
+ data = object_detector(frame)
90
+ for d in data:
91
+ if d[0] =='person':
92
+ distance = distance_finder(focal_person, PERSON_WIDTH, d[1])
93
+ x, y = d[2]
94
+ elif d[0] =='cell phone':
95
+ distance = distance_finder (focal_mobile, MOBILE_WIDTH, d[1])
96
+ x, y = d[2]
97
+ cv.rectangle(frame, (x, y-3), (x+150, y+23),BLACK,-1 )
98
+ cv.putText(frame, f'Dis: {round(distance,2)} inch', (x+5,y+13), FONTS, 0.48, GREEN, 2)
99
+
100
+ cv.imshow('frame',frame)
101
+ Recoder.write(frame)
102
+
103
+
104
+ key = cv.waitKey(1)
105
+ if key ==ord('q'):
106
+ break
107
+ cv.destroyAllWindows()
108
+ Recoder.release()
109
+ cap.release()
setup.sh.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ mkdir -p ~/.streamlit/
2
+
3
+ echo "\
4
+ [general]\n\
5
+ email = \"your-email@domain.com\"\n\
6
+ " > ~/.streamlit/credentials.toml
7
+
8
+ echo "\
9
+ [server]\n\
10
+ headless = true\n\
11
+ enableCORS=false\n\
12
+ port = $PORT\n\
13
+ " > ~/.streamlit/config.toml
tts.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from ibm_watson import TextToSpeechV1
3
+ from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
4
+ import av
5
+ import time
6
+
7
+ apikey = 'jkN9qoC2wChdRZSQH_Sc0i6bznFuFMuXiAjvIWcI5yZ3'
8
+ url = 'https://api.eu-gb.text-to-speech.watson.cloud.ibm.com/instances/d2bde643-048b-4240-a609-21ddf49082df'
9
+
10
+
11
+ # Setup Service
12
+ authenticator = IAMAuthenticator(apikey)
13
+ tts = TextToSpeechV1(authenticator=authenticator)
14
+ tts.set_service_url(url)
15
+
16
+ def get_audio():
17
+ if True:
18
+
19
+ output_text_file = open('output_text.txt','r')
20
+ frames: List[int] = []
21
+ text = ""
22
+
23
+ for line in output_text_file.readlines():
24
+ text += str(line)
25
+ if len(text):
26
+ print(">>>>>>>>>>>>>>>>>>>>>>>>",len(text))
27
+ file = 'audio.mp3'
28
+ with open(file, 'wb') as audio_file:
29
+ res = tts.synthesize(text, accept='audio/mp3', voice='en-GB_JamesV3Voice').get_result()
30
+ audio_file.write(res.content)
31
+ # container = av.open(file)
32
+ # for frame in container.decode(audio=0):
33
+ # print("Appending Frames")
34
+ # frames.append(frame)
35
+ # print(">>>>>> Frame Length: ",len(frames))
36
+ # return frames