mypronunciation / app.py
englissi's picture
Create app.py
280ab37 verified
raw
history blame contribute delete
No virus
2.23 kB
import gradio as gr
from gtts import gTTS
import speech_recognition as sr
from difflib import SequenceMatcher
import tempfile
import os
def tts(word):
tts = gTTS(text=word, lang='en')
temp_file_path = tempfile.mktemp(suffix=".mp3")
tts.save(temp_file_path)
return temp_file_path
def recognize_speech_from_microphone(audio_path):
recognizer = sr.Recognizer()
try:
with sr.AudioFile(audio_path) as source:
audio_data = recognizer.record(source)
text = recognizer.recognize_google(audio_data)
return text
except sr.UnknownValueError:
return "Could not understand the audio"
except sr.RequestError as e:
return f"Could not request results from Google Speech Recognition service; {e}"
except Exception as e:
return str(e)
def calculate_similarity(word, recognized_text):
return SequenceMatcher(None, word.lower(), recognized_text.lower()).ratio() * 100
def process_audio(word, audio_path):
recognized_text = recognize_speech_from_microphone(audio_path)
if "Error" in recognized_text or "Could not" in recognized_text:
return recognized_text, 0.0
similarity = calculate_similarity(word, recognized_text)
return recognized_text, similarity
def evaluate_pronunciation(word):
temp_file_path = tts(word)
return temp_file_path
def process_all(word, audio_path):
recognized_text, similarity = process_audio(word, audio_path)
return recognized_text, similarity
with gr.Blocks() as demo:
with gr.Row():
word_input = gr.Textbox(label="Enter the word for pronunciation")
tts_button = gr.Button("Listen to the word")
tts_audio = gr.Audio(label="Original Pronunciation", type="filepath")
with gr.Row():
mic_input = gr.Audio(label="Your Pronunciation", type="filepath")
result_button = gr.Button("Evaluate Pronunciation")
recognized_text = gr.Textbox(label="Recognized Text")
similarity_score = gr.Number(label="Similarity (%)")
tts_button.click(evaluate_pronunciation, inputs=word_input, outputs=tts_audio)
result_button.click(process_all, inputs=[word_input, mic_input], outputs=[recognized_text, similarity_score])
demo.launch()