File size: 4,263 Bytes
c7fa383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
from datetime import datetime, timedelta

import cv2
import gradio as gr
import numpy as np
import simpleaudio as sa
from ultralytics import YOLO

# Load pre-trained YOLOv8 model (replace with custom trained model)
model = YOLO('yolov8n.pt')
obstacle_classes = ['person', 'car', 'traffic light', 'stop sign']  # Customize with your classes

# Audio alerts
left_alert = sa.WaveObject.from_wave_file("left_beep.wav")
right_alert = sa.WaveObject.from_wave_file("right_beep.wav")

class ObstacleDetector:
    def __init__(self):
        self.last_alert_time = datetime.now()
        self.alert_cooldown = 2  # seconds
        self.recent_obstacles = []

    def detect_obstacles(self, frame):
        results = model(frame)
        detected_obstacles = []
        
        for box in results[0].boxes:
            class_id = int(box.cls)
            if results[0].names[class_id] in obstacle_classes:
                x_center = (box.xyxy[0][0] + box.xyxy[0][2]) / 2
                position = "left" if x_center < frame.shape[1]/2 else "right"
                detected_obstacles.append({
                    "class": results[0].names[class_id],
                    "position": position,
                    "confidence": float(box.conf)
                })
        
        return detected_obstacles, results[0].plot()

    def generate_audio_feedback(self, obstacles):
        if datetime.now() - self.last_alert_time < timedelta(seconds=self.alert_cooldown):
            return

        for obstacle in obstacles:
            if obstacle['position'] == 'left':
                left_alert.play()
                self.last_alert_time = datetime.now()
                break
            elif obstacle['position'] == 'right':
                right_alert.play()
                self.last_alert_time = datetime.now()
                break

detector = ObstacleDetector()

def process_frame(frame, enable_audio):
    if frame is None:
        return None, [], "No vibration"
    
    # Process the frame
    frame_np = np.array(frame)
    # Convert to BGR for YOLOv8
    frame_bgr = cv2.cvtColor(frame_np, cv2.COLOR_RGB2BGR)
    
    obstacles, annotated_frame = detector.detect_obstacles(frame_bgr)
    
    if enable_audio and obstacles:
        detector.generate_audio_feedback(obstacles)
    
    # Simulate vibration feedback
    vibration = "■" * min(len(obstacles), 5) if obstacles else "No vibration"
    
    # Convert back to RGB for display
    output_frame = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)
    
    return output_frame, obstacles, vibration

def create_demo():
    with gr.Blocks(title="CaneVision Demo") as demo:
        gr.Markdown("# 🦯 CaneVision - Intelligent Navigation Assistant")
        
        with gr.Row():
            with gr.Column():
                # Use gr.Image with webcam source instead of gr.Video
                camera_input = gr.Image(label="Live Camera Input", sources=["webcam"], type="numpy")
                audio_toggle = gr.Checkbox(label="Enable Audio Alerts", value=True)
                process_btn = gr.Button("Process Frame")
                
            with gr.Column():
                output_image = gr.Image(label="Detected Obstacles")
                vibration_output = gr.Textbox(label="Vibration Feedback Simulation")
                obstacles_output = gr.JSON(label="Detected Objects")

        # Process webcam frame when button is clicked
        process_btn.click(
            process_frame,
            inputs=[camera_input, audio_toggle],
            outputs=[output_image, obstacles_output, vibration_output]
        )

        # Add a live processing option with a fixed interval (if needed)
        # Not implemented here as it's better to use button for explicit processing

        gr.Markdown("## How it works:")
        gr.Markdown("""
        1. Capture an image from your webcam
        2. Click 'Process Frame' to analyze
        3. The AI detects obstacles (people, vehicles, traffic signs)
        4. System provides:
           - Spatial audio alerts (left/right)
           - Simulated vibration feedback
           - Visual detection overlay
        """)
        
        return demo

if __name__ == "__main__":
    demo = create_demo()
    demo.launch(share=True, debug=True)