Spaces:

akazmi
/

hackaton1

Sleeping

App Files Files Community

hackaton1 / app.py

akazmi

Update app.py

d2f3f83 verified about 1 year ago

raw

history blame contribute delete

7.82 kB

	import gradio as gr
	from gtts import gTTS
	import os
	import speech_recognition as sr
	from transformers import BlipProcessor, BlipForConditionalGeneration
	import torch
	from PIL import Image
	import cv2
	from pydub import AudioSegment

	# Text-to-Speech function
	def text_to_speech(text):
	tts = gTTS(text=text, lang='en', slow=False)
	filename = "output.mp3"
	tts.save(filename)
	return filename

	# Speech-to-Text function
	def speech_to_text(audio):
	recognizer = sr.Recognizer()

	# Check if the uploaded file is an MP3
	if audio.endswith('.mp3'):
	# Convert MP3 to WAV
	audio_segment = AudioSegment.from_mp3(audio)
	wav_file = "temp.wav"
	audio_segment.export(wav_file, format="wav")
	audio = wav_file # Update audio to the converted file

	try:
	with sr.AudioFile(audio) as source:
	audio_data = recognizer.record(source)
	text = recognizer.recognize_google(audio_data)
	return text
	except sr.UnknownValueError:
	return "Sorry, I could not understand the audio."
	except sr.RequestError as e:
	return f"Could not request results; {e}"
	except Exception as e:
	return f"An error occurred: {e}"

	# Image Description function
	def generate_image_description(image):
	processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

	inputs = processor(images=image, return_tensors="pt")
	out = model.generate(**inputs)
	description = processor.decode(out[0], skip_special_tokens=True)
	return description

	# Video Description function
	def generate_video_description(video):
	cap = cv2.VideoCapture(video.name)
	descriptions = []

	if not cap.isOpened():
	return "Error opening video file."

	frame_count = 0
	while frame_count < 5: # Limit to first 5 frames for this example
	ret, frame = cap.read()
	if not ret:
	break
	image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
	description = generate_image_description(image)
	descriptions.append(description)
	frame_count += 1

	cap.release()
	return descriptions if descriptions else ["No frames to describe."]

	# Gradio Interface
	def main():
	with gr.Blocks() as app:
	gr.Markdown("<h1 style='text-align: center; color: #1e90ff;'>AI-Powered Accessibility Tools</h1>")

	# Text-to-Speech Section
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("<div style='background-color: #f0f8ff; padding: 20px; border-radius: 8px;'>"
	"<h2>Text-to-Speech</h2>"
	"<ul>"
	"<li><strong>Core Idea:</strong> Create natural-sounding speech from text input.</li>"
	"<li><strong>Functionality:</strong> Converts written text into spoken words, helping individuals with reading difficulties or visual impairments.</li>"
	"<li><strong>Target Audience:</strong> People with visual impairments, reading disabilities, and those who prefer audio content.</li>"
	"</ul>"
	"<strong>Supported Input:</strong> Plain text. <br>"
	"<strong>Output:</strong> MP3 audio file."
	"</div>")
	text_input = gr.Textbox(label="Enter text for Text-to-Speech", placeholder="Type your text here...")
	tts_button = gr.Button("Convert to Speech")
	tts_output = gr.Audio(label="TTS Output")
	tts_button.click(fn=text_to_speech, inputs=text_input, outputs=tts_output)

	# Speech-to-Text Section
	with gr.Column(scale=1):
	gr.Markdown("<div style='background-color: #e6ffe6; padding: 20px; border-radius: 8px;'>"
	"<h2>Speech-to-Text</h2>"
	"<ul>"
	"<li><strong>Core Idea:</strong> Convert spoken language into written text.</li>"
	"<li><strong>Functionality:</strong> Allows users to dictate speech and have it transcribed into text, facilitating communication and documentation.</li>"
	"<li><strong>Target Audience:</strong> Individuals with hearing impairments, those who prefer speaking over typing, and people with mobility challenges.</li>"
	"</ul>"
	"<strong>Supported Input:</strong> WAV, FLAC, AIFF, MP3 (converted to WAV). <br>"
	"<strong>Output:</strong> Transcribed text."
	"</div>")
	stt_input = gr.Audio(label="Record or Upload Audio", type="filepath")
	stt_button = gr.Button("Convert Speech to Text")
	stt_output = gr.Textbox(label="Speech-to-Text Output")
	stt_button.click(fn=speech_to_text, inputs=stt_input, outputs=stt_output)

	# Image Description Section
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("<div style='background-color: #ffe6e6; padding: 20px; border-radius: 8px;'>"
	"<h2>Image Description</h2>"
	"<ul>"
	"<li><strong>Core Idea:</strong> Generate descriptive text for images.</li>"
	"<li><strong>Functionality:</strong> Analyzes and describes the content of images, making visual information accessible to those who are visually impaired.</li>"
	"<li><strong>Target Audience:</strong> Individuals with visual impairments and those needing assistance in understanding visual content.</li>"
	"</ul>"
	"<strong>Supported Input:</strong> JPEG, PNG, BMP, GIF. <br>"
	"<strong>Output:</strong> Text description."
	"</div>")
	image_input = gr.Image(label="Upload an Image")
	image_desc_output = gr.Textbox(label="Image Description")
	image_desc_button = gr.Button("Describe Image")
	image_desc_button.click(fn=generate_image_description, inputs=image_input, outputs=image_desc_output)

	# Video Description Section
	with gr.Column(scale=1):
	gr.Markdown("<div style='background-color: #fff3e6; padding: 20px; border-radius: 8px;'>"
	"<h2>Video Description</h2>"
	"<ul>"
	"<li><strong>Core Idea:</strong> Describe video content through generated text.</li>"
	"<li><strong>Functionality:</strong> Provides textual descriptions of video frames, aiding understanding for those who cannot see the video.</li>"
	"<li><strong>Target Audience:</strong> Individuals with visual impairments and those needing assistance in interpreting video content.</li>"
	"</ul>"
	"<strong>Supported Input:</strong> MP4, AVI, MOV. <br>"
	"<strong>Output:</strong> List of text descriptions."
	"</div>")
	video_input = gr.File(label="Upload a Video", file_types=[".mp4", ".avi", ".mov"])
	video_desc_output = gr.Textbox(label="Video Descriptions")
	video_desc_button = gr.Button("Describe Video")
	video_desc_button.click(fn=generate_video_description, inputs=video_input, outputs=video_desc_output)

	app.launch()

	if __name__ == "__main__":
	main()