Prathamesh1420's picture
Update app.py
e018b5b verified
import gradio as gr
from PIL import Image
import os
from utils import describe_image, text_to_speech, test_tts
print("πŸš€ Initializing Blind Vision Assistant...")
# Test TTS on startup
print("πŸ§ͺ Testing audio system...")
tts_working = test_tts()
if tts_working:
print("βœ… Audio system is working!")
else:
print("⚠️ Audio system may have issues")
class BlindVisionAssistant:
def __init__(self):
self.is_processing = False
def process_surroundings(self, image):
"""
Process the image and return audio description
"""
print("πŸ“Έ Processing image...")
if image is None:
print("❌ No image provided")
return None, "Please upload an image first.", "⚠️ No image provided"
try:
self.is_processing = True
print("πŸ”„ Generating description...")
# Get description from vision model
description = describe_image(image)
print(f"πŸ“ Description: {description}")
# Convert description to speech
print("πŸ”Š Converting to speech...")
audio_path = text_to_speech(description)
if audio_path and os.path.exists(audio_path):
file_size = os.path.getsize(audio_path)
print(f"βœ… Audio file created: {audio_path} ({file_size} bytes)")
self.is_processing = False
return audio_path, f"🎯 {description}", "βœ… Description ready! Click play to listen."
else:
print("❌ Audio file was not created")
self.is_processing = False
return None, f"🎯 {description}", "⚠️ Description generated but audio failed. Read the text above."
except Exception as e:
self.is_processing = False
print(f"❌ Error: {str(e)}")
error_msg = f"❌ Error: {str(e)}"
return None, error_msg, "❌ Processing failed"
def create_interface():
assistant = BlindVisionAssistant()
with gr.Blocks(title="Blind Vision Assistant - See with Sound") as demo:
# Header Section with Credits
gr.Markdown("""
# πŸ‘οΈβ€πŸ—¨οΈ Blind Vision Assistant
### *Helping visually impaired people understand their surroundings through sound*
**Created by: Pritam Mahesh Phalake**
*Under the guidance of: Seema Uttam Jangam*
---
""")
# Instructions
with gr.Row():
with gr.Column():
gr.Markdown("""
### πŸ“‹ How to Use:
1. **Upload an image** using the upload button below
2. Click the **'Describe Image'** button
3. **Listen** to the audio description
4. **Read** the text description below
*Note: Audio may take a few seconds to generate*
""")
# Main Content
with gr.Row():
# Left Column - Input
with gr.Column():
gr.Markdown("### πŸ“· Image Input")
image_input = gr.Image(
label="Upload Image",
type="filepath",
interactive=True,
height=300
)
with gr.Row():
process_btn = gr.Button(
"🎀 Describe Image",
variant="primary",
size="lg"
)
clear_btn = gr.Button(
"πŸ”„ Clear",
variant="secondary"
)
# Right Column - Output
with gr.Column():
gr.Markdown("### πŸ”Š Audio Output")
audio_output = gr.Audio(
label="Audio Description",
interactive=False,
type="filepath"
)
gr.Markdown("### πŸ“ Text Description")
text_output = gr.Textbox(
label="Image Description",
placeholder="Description will appear here after processing...",
lines=4,
interactive=False
)
status_output = gr.Textbox(
label="Status",
value="🟒 Ready - Upload an image above",
interactive=False
)
gr.Markdown("---")
gr.Markdown("""
<div style='text-align: center'>
<p>Built with ❀️ for accessibility | Uses BLIP AI</p>
</div>
""")
# Event Handlers
def clear_all():
return None, None, None, "🟒 Ready - Upload an image above"
# Connect buttons
process_btn.click(
fn=assistant.process_surroundings,
inputs=[image_input],
outputs=[audio_output, text_output, status_output]
)
clear_btn.click(
fn=clear_all,
inputs=[],
outputs=[image_input, audio_output, text_output, status_output]
)
return demo
if __name__ == "__main__":
print("πŸš€ Starting Blind Vision Assistant...")
print("πŸ“Έ Please upload an image to get started")
demo = create_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860
)