Spaces:
Running
Running
import gradio as gr | |
from gtts import gTTS | |
import speech_recognition as sr | |
from difflib import SequenceMatcher | |
import tempfile | |
import os | |
def tts(word): | |
tts = gTTS(text=word, lang='en') | |
temp_file_path = tempfile.mktemp(suffix=".mp3") | |
tts.save(temp_file_path) | |
return temp_file_path | |
def recognize_speech_from_microphone(audio_path): | |
recognizer = sr.Recognizer() | |
try: | |
with sr.AudioFile(audio_path) as source: | |
audio_data = recognizer.record(source) | |
text = recognizer.recognize_google(audio_data) | |
return text | |
except sr.UnknownValueError: | |
return "Could not understand the audio" | |
except sr.RequestError as e: | |
return f"Could not request results from Google Speech Recognition service; {e}" | |
except Exception as e: | |
return str(e) | |
def calculate_similarity(word, recognized_text): | |
return SequenceMatcher(None, word.lower(), recognized_text.lower()).ratio() * 100 | |
def process_audio(word, audio_path): | |
recognized_text = recognize_speech_from_microphone(audio_path) | |
if "Error" in recognized_text or "Could not" in recognized_text: | |
return recognized_text, 0.0 | |
similarity = calculate_similarity(word, recognized_text) | |
return recognized_text, similarity | |
def evaluate_pronunciation(word): | |
temp_file_path = tts(word) | |
return temp_file_path | |
def process_all(word, audio_path): | |
recognized_text, similarity = process_audio(word, audio_path) | |
return recognized_text, similarity | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
word_input = gr.Textbox(label="Enter the word for pronunciation") | |
tts_button = gr.Button("Listen to the word") | |
tts_audio = gr.Audio(label="Original Pronunciation", type="filepath") | |
with gr.Row(): | |
mic_input = gr.Audio(label="Your Pronunciation", type="filepath") | |
result_button = gr.Button("Evaluate Pronunciation") | |
recognized_text = gr.Textbox(label="Recognized Text") | |
similarity_score = gr.Number(label="Similarity (%)") | |
tts_button.click(evaluate_pronunciation, inputs=word_input, outputs=tts_audio) | |
result_button.click(process_all, inputs=[word_input, mic_input], outputs=[recognized_text, similarity_score]) | |
demo.launch() | |