Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from PIL import Image | |
| import os | |
| from utils import describe_image, text_to_speech, test_tts | |
| print("π Initializing Blind Vision Assistant...") | |
| # Test TTS on startup | |
| print("π§ͺ Testing audio system...") | |
| tts_working = test_tts() | |
| if tts_working: | |
| print("β Audio system is working!") | |
| else: | |
| print("β οΈ Audio system may have issues") | |
| class BlindVisionAssistant: | |
| def __init__(self): | |
| self.is_processing = False | |
| def process_surroundings(self, image): | |
| """ | |
| Process the image and return audio description | |
| """ | |
| print("πΈ Processing image...") | |
| if image is None: | |
| print("β No image provided") | |
| return None, "Please upload an image first.", "β οΈ No image provided" | |
| try: | |
| self.is_processing = True | |
| print("π Generating description...") | |
| # Get description from vision model | |
| description = describe_image(image) | |
| print(f"π Description: {description}") | |
| # Convert description to speech | |
| print("π Converting to speech...") | |
| audio_path = text_to_speech(description) | |
| if audio_path and os.path.exists(audio_path): | |
| file_size = os.path.getsize(audio_path) | |
| print(f"β Audio file created: {audio_path} ({file_size} bytes)") | |
| self.is_processing = False | |
| return audio_path, f"π― {description}", "β Description ready! Click play to listen." | |
| else: | |
| print("β Audio file was not created") | |
| self.is_processing = False | |
| return None, f"π― {description}", "β οΈ Description generated but audio failed. Read the text above." | |
| except Exception as e: | |
| self.is_processing = False | |
| print(f"β Error: {str(e)}") | |
| error_msg = f"β Error: {str(e)}" | |
| return None, error_msg, "β Processing failed" | |
| def create_interface(): | |
| assistant = BlindVisionAssistant() | |
| with gr.Blocks(title="Blind Vision Assistant - See with Sound") as demo: | |
| # Header Section with Credits | |
| gr.Markdown(""" | |
| # ποΈβπ¨οΈ Blind Vision Assistant | |
| ### *Helping visually impaired people understand their surroundings through sound* | |
| **Created by: Pritam Mahesh Phalake** | |
| *Under the guidance of: Seema Uttam Jangam* | |
| --- | |
| """) | |
| # Instructions | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown(""" | |
| ### π How to Use: | |
| 1. **Upload an image** using the upload button below | |
| 2. Click the **'Describe Image'** button | |
| 3. **Listen** to the audio description | |
| 4. **Read** the text description below | |
| *Note: Audio may take a few seconds to generate* | |
| """) | |
| # Main Content | |
| with gr.Row(): | |
| # Left Column - Input | |
| with gr.Column(): | |
| gr.Markdown("### π· Image Input") | |
| image_input = gr.Image( | |
| label="Upload Image", | |
| type="filepath", | |
| interactive=True, | |
| height=300 | |
| ) | |
| with gr.Row(): | |
| process_btn = gr.Button( | |
| "π€ Describe Image", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| clear_btn = gr.Button( | |
| "π Clear", | |
| variant="secondary" | |
| ) | |
| # Right Column - Output | |
| with gr.Column(): | |
| gr.Markdown("### π Audio Output") | |
| audio_output = gr.Audio( | |
| label="Audio Description", | |
| interactive=False, | |
| type="filepath" | |
| ) | |
| gr.Markdown("### π Text Description") | |
| text_output = gr.Textbox( | |
| label="Image Description", | |
| placeholder="Description will appear here after processing...", | |
| lines=4, | |
| interactive=False | |
| ) | |
| status_output = gr.Textbox( | |
| label="Status", | |
| value="π’ Ready - Upload an image above", | |
| interactive=False | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown(""" | |
| <div style='text-align: center'> | |
| <p>Built with β€οΈ for accessibility | Uses BLIP AI</p> | |
| </div> | |
| """) | |
| # Event Handlers | |
| def clear_all(): | |
| return None, None, None, "π’ Ready - Upload an image above" | |
| # Connect buttons | |
| process_btn.click( | |
| fn=assistant.process_surroundings, | |
| inputs=[image_input], | |
| outputs=[audio_output, text_output, status_output] | |
| ) | |
| clear_btn.click( | |
| fn=clear_all, | |
| inputs=[], | |
| outputs=[image_input, audio_output, text_output, status_output] | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| print("π Starting Blind Vision Assistant...") | |
| print("πΈ Please upload an image to get started") | |
| demo = create_interface() | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860 | |
| ) |