CaneVision / app.py
CARLOS VITOR BOTTI CALVI
CaneVision first
c7fa383
from datetime import datetime, timedelta
import cv2
import gradio as gr
import numpy as np
import simpleaudio as sa
from ultralytics import YOLO
# Load pre-trained YOLOv8 model (replace with custom trained model)
model = YOLO('yolov8n.pt')
obstacle_classes = ['person', 'car', 'traffic light', 'stop sign'] # Customize with your classes
# Audio alerts
left_alert = sa.WaveObject.from_wave_file("left_beep.wav")
right_alert = sa.WaveObject.from_wave_file("right_beep.wav")
class ObstacleDetector:
def __init__(self):
self.last_alert_time = datetime.now()
self.alert_cooldown = 2 # seconds
self.recent_obstacles = []
def detect_obstacles(self, frame):
results = model(frame)
detected_obstacles = []
for box in results[0].boxes:
class_id = int(box.cls)
if results[0].names[class_id] in obstacle_classes:
x_center = (box.xyxy[0][0] + box.xyxy[0][2]) / 2
position = "left" if x_center < frame.shape[1]/2 else "right"
detected_obstacles.append({
"class": results[0].names[class_id],
"position": position,
"confidence": float(box.conf)
})
return detected_obstacles, results[0].plot()
def generate_audio_feedback(self, obstacles):
if datetime.now() - self.last_alert_time < timedelta(seconds=self.alert_cooldown):
return
for obstacle in obstacles:
if obstacle['position'] == 'left':
left_alert.play()
self.last_alert_time = datetime.now()
break
elif obstacle['position'] == 'right':
right_alert.play()
self.last_alert_time = datetime.now()
break
detector = ObstacleDetector()
def process_frame(frame, enable_audio):
if frame is None:
return None, [], "No vibration"
# Process the frame
frame_np = np.array(frame)
# Convert to BGR for YOLOv8
frame_bgr = cv2.cvtColor(frame_np, cv2.COLOR_RGB2BGR)
obstacles, annotated_frame = detector.detect_obstacles(frame_bgr)
if enable_audio and obstacles:
detector.generate_audio_feedback(obstacles)
# Simulate vibration feedback
vibration = "■" * min(len(obstacles), 5) if obstacles else "No vibration"
# Convert back to RGB for display
output_frame = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)
return output_frame, obstacles, vibration
def create_demo():
with gr.Blocks(title="CaneVision Demo") as demo:
gr.Markdown("# 🦯 CaneVision - Intelligent Navigation Assistant")
with gr.Row():
with gr.Column():
# Use gr.Image with webcam source instead of gr.Video
camera_input = gr.Image(label="Live Camera Input", sources=["webcam"], type="numpy")
audio_toggle = gr.Checkbox(label="Enable Audio Alerts", value=True)
process_btn = gr.Button("Process Frame")
with gr.Column():
output_image = gr.Image(label="Detected Obstacles")
vibration_output = gr.Textbox(label="Vibration Feedback Simulation")
obstacles_output = gr.JSON(label="Detected Objects")
# Process webcam frame when button is clicked
process_btn.click(
process_frame,
inputs=[camera_input, audio_toggle],
outputs=[output_image, obstacles_output, vibration_output]
)
# Add a live processing option with a fixed interval (if needed)
# Not implemented here as it's better to use button for explicit processing
gr.Markdown("## How it works:")
gr.Markdown("""
1. Capture an image from your webcam
2. Click 'Process Frame' to analyze
3. The AI detects obstacles (people, vehicles, traffic signs)
4. System provides:
- Spatial audio alerts (left/right)
- Simulated vibration feedback
- Visual detection overlay
""")
return demo
if __name__ == "__main__":
demo = create_demo()
demo.launch(share=True, debug=True)