import gradio as gr
from gtts import gTTS
import speech_recognition as sr
from difflib import SequenceMatcher
import tempfile
import os

def tts(word):
    tts = gTTS(text=word, lang='en')
    temp_file_path = tempfile.mktemp(suffix=".mp3")
    tts.save(temp_file_path)
    return temp_file_path

def recognize_speech_from_microphone(audio_path):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_path) as source:
            audio_data = recognizer.record(source)
            text = recognizer.recognize_google(audio_data)
            return text
    except sr.UnknownValueError:
        return "Could not understand the audio"
    except sr.RequestError as e:
        return f"Could not request results from Google Speech Recognition service; {e}"
    except Exception as e:
        return str(e)

def calculate_similarity(word, recognized_text):
    return SequenceMatcher(None, word.lower(), recognized_text.lower()).ratio() * 100

def process_audio(word, audio_path):
    recognized_text = recognize_speech_from_microphone(audio_path)
    if "Error" in recognized_text or "Could not" in recognized_text:
        return recognized_text, 0.0
    similarity = calculate_similarity(word, recognized_text)
    return recognized_text, similarity

def evaluate_pronunciation(word):
    temp_file_path = tts(word)
    return temp_file_path

def process_all(word, audio_path):
    recognized_text, similarity = process_audio(word, audio_path)
    return recognized_text, similarity

with gr.Blocks() as demo:
    with gr.Row():
        word_input = gr.Textbox(label="Enter the word for pronunciation")
        tts_button = gr.Button("Listen to the word")
    tts_audio = gr.Audio(label="Original Pronunciation", type="filepath")

    with gr.Row():
        mic_input = gr.Audio(label="Your Pronunciation", type="filepath")
        result_button = gr.Button("Evaluate Pronunciation")

    recognized_text = gr.Textbox(label="Recognized Text")
    similarity_score = gr.Number(label="Similarity (%)")

    tts_button.click(evaluate_pronunciation, inputs=word_input, outputs=tts_audio)
    result_button.click(process_all, inputs=[word_input, mic_input], outputs=[recognized_text, similarity_score])

demo.launch()