import torch import cv2 import pyttsx3 import random # Download model from GitHub model = torch.hub.load('ultralytics/yolov5', 'yolov5n') # Initialize video capture cap = cv2.VideoCapture('cars.mp4') # Initialize text-to-speech engine engine = pyttsx3.init() # Simulated GPS location (latitude, longitude) gps_location = (37.7749, -122.4194) # Example coordinates for San Francisco # Function to speak the detected object def speak(text): engine.say(text) engine.runAndWait() while True: ret, img = cap.read() if not ret: break # Perform detection on the image result = model(img) print('result: ', result) # Convert detected result to pandas DataFrame data_frame = result.pandas().xyxy[0] print('data_frame:') print(data_frame) # Get indexes of all the rows indexes = data_frame.index for index in indexes: # Find the coordinate of top left corner of bounding box x1 = int(data_frame['xmin'][index]) y1 = int(data_frame['ymin'][index]) # Find the coordinate of bottom right corner of bounding box x2 = int(data_frame['xmax'][index]) y2 = int(data_frame['ymax'][index]) # Find label name and confidence score label = data_frame['name'][index] conf = data_frame['confidence'][index] text = f"{label} {conf:.2f}" # Draw bounding box and label on the image cv2.rectangle(img, (x1, y1), (x2, y2), (255, 255, 0), 2) cv2.putText(img, text, (x1, y1 - 5), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2) # Context-aware actions based on detected objects if label == "car" and conf > 0.5: # Announce detected car and GPS location speak(f"Car detected at GPS location: {gps_location[0]}, {gps_location[1]}") # Here you can add more context-based features (e.g., alerting, saving data, etc.) # Display GPS coordinates on the image gps_text = f"GPS: {gps_location[0]:.4f}, {gps_location[1]:.4f}" cv2.putText(img, gps_text, (10, 30), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0), 2) # Show the processed image cv2.imshow('IMAGE', img) if cv2.waitKey(1) & 0xFF == ord('q'): break # Release resources cap.release() cv2.destroyAllWindows()