englissi commited on
Commit
280ab37
1 Parent(s): 4959607

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ from gtts import gTTS
4
+ import speech_recognition as sr
5
+ from difflib import SequenceMatcher
6
+ import tempfile
7
+ import os
8
+
9
+ def tts(word):
10
+ tts = gTTS(text=word, lang='en')
11
+ temp_file_path = tempfile.mktemp(suffix=".mp3")
12
+ tts.save(temp_file_path)
13
+ return temp_file_path
14
+
15
+ def recognize_speech_from_microphone(audio_path):
16
+ recognizer = sr.Recognizer()
17
+ try:
18
+ with sr.AudioFile(audio_path) as source:
19
+ audio_data = recognizer.record(source)
20
+ text = recognizer.recognize_google(audio_data)
21
+ return text
22
+ except sr.UnknownValueError:
23
+ return "Could not understand the audio"
24
+ except sr.RequestError as e:
25
+ return f"Could not request results from Google Speech Recognition service; {e}"
26
+ except Exception as e:
27
+ return str(e)
28
+
29
+ def calculate_similarity(word, recognized_text):
30
+ return SequenceMatcher(None, word.lower(), recognized_text.lower()).ratio() * 100
31
+
32
+ def process_audio(word, audio_path):
33
+ recognized_text = recognize_speech_from_microphone(audio_path)
34
+ if "Error" in recognized_text or "Could not" in recognized_text:
35
+ return recognized_text, 0.0
36
+ similarity = calculate_similarity(word, recognized_text)
37
+ return recognized_text, similarity
38
+
39
+ def evaluate_pronunciation(word):
40
+ temp_file_path = tts(word)
41
+ return temp_file_path
42
+
43
+ def process_all(word, audio_path):
44
+ recognized_text, similarity = process_audio(word, audio_path)
45
+ return recognized_text, similarity
46
+
47
+ with gr.Blocks() as demo:
48
+ with gr.Row():
49
+ word_input = gr.Textbox(label="Enter the word for pronunciation")
50
+ tts_button = gr.Button("Listen to the word")
51
+ tts_audio = gr.Audio(label="Original Pronunciation", type="filepath")
52
+
53
+ with gr.Row():
54
+ mic_input = gr.Audio(label="Your Pronunciation", type="filepath")
55
+ result_button = gr.Button("Evaluate Pronunciation")
56
+
57
+ recognized_text = gr.Textbox(label="Recognized Text")
58
+ similarity_score = gr.Number(label="Similarity (%)")
59
+
60
+ tts_button.click(evaluate_pronunciation, inputs=word_input, outputs=tts_audio)
61
+ result_button.click(process_all, inputs=[word_input, mic_input], outputs=[recognized_text, similarity_score])
62
+
63
+ demo.launch()