Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,9 @@ from pydub import AudioSegment
|
|
8 |
|
9 |
# Step 1: Transcribe the audio file
|
10 |
def transcribe_audio(audio):
|
|
|
|
|
|
|
11 |
recognizer = sr.Recognizer()
|
12 |
audio_format = audio.split('.')[-1].lower()
|
13 |
|
@@ -43,14 +46,17 @@ def create_pronunciation_audio(word):
|
|
43 |
audio_buffer = io.BytesIO()
|
44 |
tts.save(audio_buffer)
|
45 |
audio_buffer.seek(0)
|
46 |
-
|
|
|
|
|
|
|
47 |
|
48 |
# Step 3: Compare the transcribed text with the input paragraph
|
49 |
def compare_texts(reference_text, transcribed_text):
|
50 |
word_scores = []
|
51 |
reference_words = reference_text.split()
|
52 |
transcribed_words = transcribed_text.split()
|
53 |
-
incorrect_words_audios = [] # Store audio
|
54 |
|
55 |
sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
|
56 |
similarity_score = round(sm.ratio() * 100, 2)
|
@@ -72,10 +78,8 @@ def compare_texts(reference_text, transcribed_text):
|
|
72 |
# Incorrect words in red
|
73 |
html_output += f'<span style="color: red;">{word}</span> '
|
74 |
# Create pronunciation audio for the incorrect word
|
75 |
-
|
76 |
-
|
77 |
-
audio_base64 = audio_buffer.getvalue().hex()
|
78 |
-
incorrect_words_audios.append((word, audio_base64))
|
79 |
except IndexError:
|
80 |
html_output += f'<span style="color: red;">{word}</span> ' # Words in reference that were not transcribed
|
81 |
|
@@ -84,18 +88,23 @@ def compare_texts(reference_text, transcribed_text):
|
|
84 |
html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
|
85 |
for word, audio in incorrect_words_audios:
|
86 |
html_output += f'{word}: '
|
87 |
-
|
88 |
-
html_output += f'<audio controls><source src="data:audio/mp3;base64,{audio}" type="audio/mpeg">Your browser does not support the audio tag.</audio><br>'
|
89 |
|
90 |
return html_output
|
91 |
|
92 |
# Step 4: Text-to-Speech Function
|
93 |
def text_to_speech(paragraph):
|
|
|
|
|
|
|
94 |
tts = gTTS(paragraph)
|
95 |
audio_buffer = io.BytesIO()
|
96 |
tts.save(audio_buffer)
|
97 |
audio_buffer.seek(0)
|
98 |
-
|
|
|
|
|
|
|
99 |
|
100 |
# Gradio Interface Function
|
101 |
def gradio_function(paragraph, audio):
|
|
|
8 |
|
9 |
# Step 1: Transcribe the audio file
|
10 |
def transcribe_audio(audio):
|
11 |
+
if audio is None:
|
12 |
+
return "No audio file provided." # Handle the case when no audio is uploaded
|
13 |
+
|
14 |
recognizer = sr.Recognizer()
|
15 |
audio_format = audio.split('.')[-1].lower()
|
16 |
|
|
|
46 |
audio_buffer = io.BytesIO()
|
47 |
tts.save(audio_buffer)
|
48 |
audio_buffer.seek(0)
|
49 |
+
audio_file_path = f"audio/{word}.mp3" # Save the audio to a file
|
50 |
+
with open(audio_file_path, 'wb') as f:
|
51 |
+
f.write(audio_buffer.read())
|
52 |
+
return audio_file_path # Return the file path instead of BytesIO
|
53 |
|
54 |
# Step 3: Compare the transcribed text with the input paragraph
|
55 |
def compare_texts(reference_text, transcribed_text):
|
56 |
word_scores = []
|
57 |
reference_words = reference_text.split()
|
58 |
transcribed_words = transcribed_text.split()
|
59 |
+
incorrect_words_audios = [] # Store audio paths for incorrect words
|
60 |
|
61 |
sm = difflib.SequenceMatcher(None, reference_text, transcribed_text)
|
62 |
similarity_score = round(sm.ratio() * 100, 2)
|
|
|
78 |
# Incorrect words in red
|
79 |
html_output += f'<span style="color: red;">{word}</span> '
|
80 |
# Create pronunciation audio for the incorrect word
|
81 |
+
audio_file_path = create_pronunciation_audio(word)
|
82 |
+
incorrect_words_audios.append((word, audio_file_path))
|
|
|
|
|
83 |
except IndexError:
|
84 |
html_output += f'<span style="color: red;">{word}</span> ' # Words in reference that were not transcribed
|
85 |
|
|
|
88 |
html_output += "<br><strong>Pronunciation for Incorrect Words:</strong><br>"
|
89 |
for word, audio in incorrect_words_audios:
|
90 |
html_output += f'{word}: '
|
91 |
+
html_output += f'<audio controls><source src="{audio}" type="audio/mpeg">Your browser does not support the audio tag.</audio><br>'
|
|
|
92 |
|
93 |
return html_output
|
94 |
|
95 |
# Step 4: Text-to-Speech Function
|
96 |
def text_to_speech(paragraph):
|
97 |
+
if not paragraph:
|
98 |
+
return None # Handle the case when no text is provided
|
99 |
+
|
100 |
tts = gTTS(paragraph)
|
101 |
audio_buffer = io.BytesIO()
|
102 |
tts.save(audio_buffer)
|
103 |
audio_buffer.seek(0)
|
104 |
+
audio_file_path = "audio/text_to_speech.mp3" # Save the audio to a file
|
105 |
+
with open(audio_file_path, 'wb') as f:
|
106 |
+
f.write(audio_buffer.read())
|
107 |
+
return audio_file_path # Return the file path instead of BytesIO
|
108 |
|
109 |
# Gradio Interface Function
|
110 |
def gradio_function(paragraph, audio):
|