import torch
import cv2
import pyttsx3
import random

# Download model from GitHub
model = torch.hub.load('ultralytics/yolov5', 'yolov5n')

# Initialize video capture
cap = cv2.VideoCapture('cars.mp4')

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Simulated GPS location (latitude, longitude)
gps_location = (37.7749, -122.4194)  # Example coordinates for San Francisco

# Function to speak the detected object
def speak(text):
    engine.say(text)
    engine.runAndWait()

while True:
    ret, img = cap.read()
    if not ret:
        break

    # Perform detection on the image
    result = model(img)
    print('result: ', result)

    # Convert detected result to pandas DataFrame
    data_frame = result.pandas().xyxy[0]
    print('data_frame:')
    print(data_frame)

    # Get indexes of all the rows
    indexes = data_frame.index
    for index in indexes:
        # Find the coordinate of top left corner of bounding box
        x1 = int(data_frame['xmin'][index])
        y1 = int(data_frame['ymin'][index])
        # Find the coordinate of bottom right corner of bounding box
        x2 = int(data_frame['xmax'][index])
        y2 = int(data_frame['ymax'][index])

        # Find label name and confidence score
        label = data_frame['name'][index]
        conf = data_frame['confidence'][index]
        text = f"{label} {conf:.2f}"

        # Draw bounding box and label on the image
        cv2.rectangle(img, (x1, y1), (x2, y2), (255, 255, 0), 2)
        cv2.putText(img, text, (x1, y1 - 5), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2)

        # Context-aware actions based on detected objects
        if label == "car" and conf > 0.5:
            # Announce detected car and GPS location
            speak(f"Car detected at GPS location: {gps_location[0]}, {gps_location[1]}")
            # Here you can add more context-based features (e.g., alerting, saving data, etc.)

    # Display GPS coordinates on the image
    gps_text = f"GPS: {gps_location[0]:.4f}, {gps_location[1]:.4f}"
    cv2.putText(img, gps_text, (10, 30), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0), 2)

    # Show the processed image
    cv2.imshow('IMAGE', img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()