from datetime import datetime, timedelta import cv2 import gradio as gr import numpy as np import simpleaudio as sa from ultralytics import YOLO # Load pre-trained YOLOv8 model (replace with custom trained model) model = YOLO('yolov8n.pt') obstacle_classes = ['person', 'car', 'traffic light', 'stop sign'] # Customize with your classes # Audio alerts left_alert = sa.WaveObject.from_wave_file("left_beep.wav") right_alert = sa.WaveObject.from_wave_file("right_beep.wav") class ObstacleDetector: def __init__(self): self.last_alert_time = datetime.now() self.alert_cooldown = 2 # seconds self.recent_obstacles = [] def detect_obstacles(self, frame): results = model(frame) detected_obstacles = [] for box in results[0].boxes: class_id = int(box.cls) if results[0].names[class_id] in obstacle_classes: x_center = (box.xyxy[0][0] + box.xyxy[0][2]) / 2 position = "left" if x_center < frame.shape[1]/2 else "right" detected_obstacles.append({ "class": results[0].names[class_id], "position": position, "confidence": float(box.conf) }) return detected_obstacles, results[0].plot() def generate_audio_feedback(self, obstacles): if datetime.now() - self.last_alert_time < timedelta(seconds=self.alert_cooldown): return for obstacle in obstacles: if obstacle['position'] == 'left': left_alert.play() self.last_alert_time = datetime.now() break elif obstacle['position'] == 'right': right_alert.play() self.last_alert_time = datetime.now() break detector = ObstacleDetector() def process_frame(frame, enable_audio): if frame is None: return None, [], "No vibration" # Process the frame frame_np = np.array(frame) # Convert to BGR for YOLOv8 frame_bgr = cv2.cvtColor(frame_np, cv2.COLOR_RGB2BGR) obstacles, annotated_frame = detector.detect_obstacles(frame_bgr) if enable_audio and obstacles: detector.generate_audio_feedback(obstacles) # Simulate vibration feedback vibration = "■" * min(len(obstacles), 5) if obstacles else "No vibration" # Convert back to RGB for display output_frame = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB) return output_frame, obstacles, vibration def create_demo(): with gr.Blocks(title="CaneVision Demo") as demo: gr.Markdown("# 🦯 CaneVision - Intelligent Navigation Assistant") with gr.Row(): with gr.Column(): # Use gr.Image with webcam source instead of gr.Video camera_input = gr.Image(label="Live Camera Input", sources=["webcam"], type="numpy") audio_toggle = gr.Checkbox(label="Enable Audio Alerts", value=True) process_btn = gr.Button("Process Frame") with gr.Column(): output_image = gr.Image(label="Detected Obstacles") vibration_output = gr.Textbox(label="Vibration Feedback Simulation") obstacles_output = gr.JSON(label="Detected Objects") # Process webcam frame when button is clicked process_btn.click( process_frame, inputs=[camera_input, audio_toggle], outputs=[output_image, obstacles_output, vibration_output] ) # Add a live processing option with a fixed interval (if needed) # Not implemented here as it's better to use button for explicit processing gr.Markdown("## How it works:") gr.Markdown(""" 1. Capture an image from your webcam 2. Click 'Process Frame' to analyze 3. The AI detects obstacles (people, vehicles, traffic signs) 4. System provides: - Spatial audio alerts (left/right) - Simulated vibration feedback - Visual detection overlay """) return demo if __name__ == "__main__": demo = create_demo() demo.launch(share=True, debug=True)