Spaces:

rahulramar
/

final_streamlit_app

Runtime error

App Files Files Community

rahulramar commited on Nov 29, 2022

Commit

eccb33b

•

1 Parent(s): 656745e

intitial commit

Browse files

Files changed (6) hide show

CaptureReferenceImage.py +69 -0
DistanceEstimation.py +166 -0
Procfile.txt +1 -0
demo_Recoder.py +109 -0
setup.sh.txt +13 -0
tts.py +36 -0

CaptureReferenceImage.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import cv2 as cv
+import time
+# setting parameters
+CONFIDENCE_THRESHOLD = 0.5
+NMS_THRESHOLD = 0.5
+# colors for object detected
+COLORS = [(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)]
+GREEN = (0, 255, 0)
+RED = (0, 0, 255)
+PINK = (147, 20, 255)
+ORANGE = (0, 69, 255)
+fonts = cv.FONT_HERSHEY_COMPLEX
+# reading class name from text file
+class_names = []
+with open("classes.txt", "r") as f:
+    class_names = [cname.strip() for cname in f.readlines()]
+#  setttng up opencv net
+yoloNet = cv.dnn.readNet('yolov4-tiny.weights', 'yolov4-tiny.cfg')
+yoloNet.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
+yoloNet.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16)
+model = cv.dnn_DetectionModel(yoloNet)
+model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)
+# setting camera
+def ObjectDetector(image):
+    classes, scores, boxes = model.detect(
+        image, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
+    for (classid, score, box) in zip(classes, scores, boxes):
+        color = COLORS[int(classid) % len(COLORS)]
+        label = "%s : %f" % (class_names[classid[0]], score)
+        cv.rectangle(image, box, color, 2)
+        cv.putText(frame, label, (box[0], box[1]-10), fonts, 0.5, color, 2)
+camera = cv.VideoCapture(0)
+counter = 0
+capture = False
+number = 0
+while True:
+    ret, frame = camera.read()
+    orignal = frame.copy()
+    ObjectDetector(frame)
+    cv.imshow('oringal', orignal)
+    print(capture == True and counter < 10)
+    if capture == True and counter < 10:
+        counter += 1
+        cv.putText(
+            frame, f"Capturing Img No: {number}", (30, 30), fonts, 0.6, PINK, 2)
+    else:
+        counter = 0
+    cv.imshow('frame', frame)
+    key = cv.waitKey(1)
+    if key == ord('c'):
+        capture = True
+        number += 1
+        cv.imwrite(f'ReferenceImages/image{number}.png', orignal)
+    if key == ord('q'):
+        break
+cv.destroyAllWindows()

DistanceEstimation.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import cv2 as cv
+import numpy as np
+from tts import *
+# Distance constants
+KNOWN_DISTANCE = 45 #INCHES
+PERSON_WIDTH = 16 #INCHES
+MOBILE_WIDTH = 3.0 #INCHES
+CHAIR_WIDTH = 20.0
+text1 = ""
+text2 = ""
+# Object detector constant
+CONFIDENCE_THRESHOLD = 0.4
+NMS_THRESHOLD = 0.3
+# colors for object detected
+COLORS = [(255,0,0),(255,0,255),(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)]
+GREEN =(0,255,0)
+BLACK =(0,0,0)
+# defining fonts
+FONTS = cv.FONT_HERSHEY_COMPLEX
+# getting class names from classes.txt file
+class_names = []
+with open("classes.txt", "r") as f:
+    class_names = [cname.strip() for cname in f.readlines()]
+#  setttng up opencv net
+yoloNet = cv.dnn.readNet('yolov4-tiny.weights', 'yolov4-tiny.cfg')
+yoloNet.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
+yoloNet.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16)
+model = cv.dnn_DetectionModel(yoloNet)
+model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)
+# object detector funciton /method
+def object_detector(image):
+    classes, scores, boxes = model.detect(image, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
+    # creating empty list to add objects data
+    data_list =[]
+    for (classid, score, box) in zip(classes, scores, boxes):
+        x1,y1,x2,y2 = box
+        center_x, center_y =  ( x1 + x2 ) / 2, ( y1 + y2 ) / 2
+        height, width, channels = image.shape
+        # print(x1,y1,x2,y2)
+        # define color of each, object based on its class id
+        if center_x <= width/3:
+            W_pos = "left"
+        elif center_x <= (width/3 * 2):
+            W_pos = "center"
+        else:
+            W_pos = "right"
+        if center_y <= height/3:
+            H_pos = "top"
+        elif center_y <= (height/3 * 2):
+            H_pos = "mid"
+        else:
+            H_pos = "bottom"
+        text1 = W_pos
+        text2 = H_pos
+        color= COLORS[int(classid) % len(COLORS)]
+        label = "%s : %f" % (class_names[classid], score)
+        # draw rectangle on and label on object
+        cv.rectangle(image, box, color, 2)
+        cv.putText(image, label, (box[0], box[1]-14), FONTS, 0.5, color, 2)
+         # getting the data
+        # 1: class name
+        # 2: object width in pixels,
+        # 3: position where have to draw text(distance)
+        print("objects identified status")
+        print("person identified : ",classid == 0)
+        print("mobile identified : ",classid == 67)
+        print("chair identified : ",classid == 56)
+        # getting the data
+        # 1: class name  2: object width in pixels, 3: position where have to draw text(distance)
+        if classid == 0: # person class id
+            data_list.append([class_names[classid], box[2], (box[0], box[1]-2),text1,text2])
+        elif classid == 67:
+            data_list.append([class_names[classid], box[2], (box[0], box[1]-2),text1,text2])
+        elif classid == 56:
+            data_list.append([class_names[classid], box[2], (box[0], box[1]-2),text1,text2])
+        # if you want inclulde more classes then you have to simply add more [elif] statements here
+        # returning list containing the object data.
+    return data_list
+def focal_length_finder (measured_distance, real_width, width_in_rf):
+    focal_length = (width_in_rf * measured_distance) / real_width
+    return focal_length
+# distance finder function
+def distance_finder(focal_length, real_object_width, width_in_frmae):
+    distance = (real_object_width * focal_length) / width_in_frmae
+    return distance
+# reading the reference image from dir
+ref_person = cv.imread('ReferenceImages/image14.png')
+ref_mobile = cv.imread('ReferenceImages/image4.png')
+ref_chair = cv.imread('ReferenceImages/image22.png')
+mobile_data = object_detector(ref_mobile)
+mobile_width_in_rf = mobile_data[1][1]
+person_data = object_detector(ref_person)
+person_width_in_rf = person_data[0][1]
+chair_data = object_detector(ref_person)
+chair_width_in_rf = chair_data[0][1]
+# print(f"Person width in pixels : {person_width_in_rf} mobile width in pixel: {mobile_width_in_rf}")
+# finding focal length
+focal_person = focal_length_finder(KNOWN_DISTANCE, PERSON_WIDTH, person_width_in_rf)
+focal_mobile = focal_length_finder(KNOWN_DISTANCE, MOBILE_WIDTH, mobile_width_in_rf)
+focal_chair = focal_length_finder(KNOWN_DISTANCE, CHAIR_WIDTH, chair_width_in_rf)
+#d[]
+def get_frame_output(frame, frame_cnt):
+    output_text_file = open('output_text.txt','w')
+    data = object_detector(frame)
+    for d in data:
+        if d[0] =='person':
+            distance = distance_finder(focal_person, PERSON_WIDTH, d[1])
+            x, y = d[2]
+        elif d[0] =='cell phone':
+            distance = distance_finder (focal_mobile, MOBILE_WIDTH, d[1])
+            x, y = d[2]
+        elif d[0] == 'chair':
+            distance = distance_finder (focal_chair, CHAIR_WIDTH, d[1])
+            x, y = d[2]
+        text1,text2=d[3],d[4]
+        cv.rectangle(frame, (x, y-3), (x+150, y+23),BLACK,-1 )
+        cv.putText(frame, f'Dis: {round(distance,2)} inch', (x+5,y+13), FONTS, 0.48, GREEN, 2)
+        OUTPUTtEXT=""
+        if distance > 100:
+            OUTPUTtEXT = "Get closer"
+        elif (round(distance) > 50) and (text2 == "mid"):
+            OUTPUTtEXT="Go straight"
+        else:
+            OUTPUTtEXT = (str(d[0]) + " " + str(int(round(distance,1))) +" inches"+" take left or right")
+        output_text_file.write(OUTPUTtEXT)
+        output_text_file.write("\n")
+    output_text_file.close()
+    return frame

Procfile.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: sh setup.sh && streamlit run app.py

demo_Recoder.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import cv2 as cv
+import numpy as np
+# Distance constants
+KNOWN_DISTANCE = 45 #INCHES
+PERSON_WIDTH = 16 #INCHES
+MOBILE_WIDTH = 3.0 #INCHES
+# Object detector constant
+CONFIDENCE_THRESHOLD = 0.4
+NMS_THRESHOLD = 0.3
+# colors for object detected
+COLORS = [(255,0,0),(255,0,255),(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)]
+GREEN =(0,255,0)
+BLACK =(0,0,0)
+# defining fonts
+FONTS = cv.FONT_HERSHEY_COMPLEX
+# getting class names from classes.txt file
+class_names = []
+with open("classes.txt", "r") as f:
+    class_names = [cname.strip() for cname in f.readlines()]
+#  setttng up opencv net
+yoloNet = cv.dnn.readNet('yolov4-tiny.weights', 'yolov4-tiny.cfg')
+yoloNet.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
+yoloNet.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16)
+model = cv.dnn_DetectionModel(yoloNet)
+model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)
+# object detector funciton /method
+def object_detector(image):
+    classes, scores, boxes = model.detect(image, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
+    # creating empty list to add objects data
+    data_list =[]
+    for (classid, score, box) in zip(classes, scores, boxes):
+        # define color of each, object based on its class id
+        color= COLORS[int(classid) % len(COLORS)]
+        label = "%s : %f" % (class_names[classid[0]], score)
+        # draw rectangle on and label on object
+        cv.rectangle(image, box, color, 2)
+        cv.putText(image, label, (box[0], box[1]-14), FONTS, 0.5, color, 2)
+        # getting the data
+        # 1: class name  2: object width in pixels, 3: position where have to draw text(distance)
+        if classid ==0: # person class id
+            data_list.append([class_names[classid[0]], box[2], (box[0], box[1]-2)])
+        elif classid ==67:
+            data_list.append([class_names[classid[0]], box[2], (box[0], box[1]-2)])
+        # return list
+    return data_list
+def focal_length_finder (measured_distance, real_width, width_in_rf):
+    focal_length = (width_in_rf * measured_distance) / real_width
+    return focal_length
+# distance finder function
+def distance_finder(focal_length, real_object_width, width_in_frmae):
+    distance = (real_object_width * focal_length) / width_in_frmae
+    return distance
+# reading the reference image from dir
+ref_person = cv.imread('ReferenceImages/image14.png')
+ref_mobile = cv.imread('ReferenceImages/image4.png')
+mobile_data = object_detector(ref_mobile)
+mobile_width_in_rf = mobile_data[1][1]
+person_data = object_detector(ref_person)
+person_width_in_rf = person_data[0][1]
+print(f"Person width in pixels : {person_width_in_rf} mobile width in pixel: {mobile_width_in_rf}")
+# finding focal length
+focal_person = focal_length_finder(KNOWN_DISTANCE, PERSON_WIDTH, person_width_in_rf)
+focal_mobile = focal_length_finder(KNOWN_DISTANCE, MOBILE_WIDTH, mobile_width_in_rf)
+cap = cv.VideoCapture(3)
+fourcc = cv.VideoWriter_fourcc(*'XVID')
+Recoder = cv.VideoWriter('out.mp4', fourcc,8.0,(int(cap.get(cv.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))) )
+while True:
+    ret, frame = cap.read()
+    data = object_detector(frame)
+    for d in data:
+        if d[0] =='person':
+            distance = distance_finder(focal_person, PERSON_WIDTH, d[1])
+            x, y = d[2]
+        elif d[0] =='cell phone':
+            distance = distance_finder (focal_mobile, MOBILE_WIDTH, d[1])
+            x, y = d[2]
+        cv.rectangle(frame, (x, y-3), (x+150, y+23),BLACK,-1 )
+        cv.putText(frame, f'Dis: {round(distance,2)} inch', (x+5,y+13), FONTS, 0.48, GREEN, 2)
+    cv.imshow('frame',frame)
+    Recoder.write(frame)
+    key = cv.waitKey(1)
+    if key ==ord('q'):
+        break
+cv.destroyAllWindows()
+Recoder.release()
+cap.release()

setup.sh.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+mkdir -p ~/.streamlit/
+echo "\
+[general]\n\
+email = \"your-email@domain.com\"\n\
+" > ~/.streamlit/credentials.toml
+echo "\
+[server]\n\
+headless = true\n\
+enableCORS=false\n\
+port = $PORT\n\
+" > ~/.streamlit/config.toml

tts.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from ibm_watson import TextToSpeechV1
+from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
+import av
+import time
+apikey = 'jkN9qoC2wChdRZSQH_Sc0i6bznFuFMuXiAjvIWcI5yZ3'
+url = 'https://api.eu-gb.text-to-speech.watson.cloud.ibm.com/instances/d2bde643-048b-4240-a609-21ddf49082df'
+# Setup Service
+authenticator = IAMAuthenticator(apikey)
+tts = TextToSpeechV1(authenticator=authenticator)
+tts.set_service_url(url)
+def get_audio():
+    if True:
+        output_text_file = open('output_text.txt','r')
+        frames: List[int] = []
+        text = ""
+        for line in output_text_file.readlines():
+            text += str(line)
+        if len(text):
+            print(">>>>>>>>>>>>>>>>>>>>>>>>",len(text))
+            file = 'audio.mp3'
+            with open(file, 'wb') as audio_file:
+                res = tts.synthesize(text, accept='audio/mp3', voice='en-GB_JamesV3Voice').get_result()
+                audio_file.write(res.content)
+    #     container = av.open(file)
+    #     for frame in container.decode(audio=0):
+    #         print("Appending Frames")
+    #         frames.append(frame)
+    # print(">>>>>> Frame Length: ",len(frames))
+    # return frames