CAPT-ReadAloud / app.py
seba3y's picture
Upload 4 files
be1b9b7 verified
raw history blame
No virus
4.91 kB
import gradio as gr
# from logic import Speaker_speech_analysis
from scipy.io import wavfile
from wav2vec_aligen import speaker_pronunciation_assesment
def create_html_from_scores(word_levels):
html_output = ''
for word, level in word_levels:
if level == '/':
html_output += f'<span style="color: #0000ff;">{level}</span> '
elif level == 'Wrong':
html_output += f'<span style="color: #dc3545;">{word}</span> '
elif level == 'Understandable':
html_output += f'<span style="color: #ffc107;">{word}</span> '
else:
html_output += f'<span style="color: #28a745;">{word}</span> '
return html_output
def generate_progress_bar(score, label):
score = round(score, 2)
score_text = f"{score:.2f}" if score < 100 else "100"
if score < 30:
bar_color = "#dc3545"
elif score < 60:
bar_color = "#dc6545"
elif score < 80:
bar_color = "#ffc107"
else:
bar_color = "#28a745"
bar_length = f"{(score / 100) * 100}%"
return f"""
<div class="progress-label">{label}:</div>
<div class="progress-container">
<div class="progress-bar" style="width: {bar_length}; background-color: {bar_color};">
<div class="progress-score">{score_text}</div>
</div>
</div>
<div class="progress-max">Max: 100</div>
"""
# CSS to be used in the Gradio Interface
def analyze_audio(text, audio):
# Write the processed audio to a temporary WAV file
if text is None or audio is None:
return 'the audio or the text is missing'
temp_filename = 'temp_audio.wav'
wavfile.write(temp_filename, audio[0], audio[1])
result = speaker_pronunciation_assesment(temp_filename)
accuracy_score = result['pronunciation_accuracy']
fluency_score = result['fluency_score']
total_score = result['total_score']
content_scores = result['content_scores']
pronunciation_progress_bar = generate_progress_bar(accuracy_score, "Pronunciation Accuracy")
fluency_progress_bar = generate_progress_bar(fluency_score, "Fluency Score")
content_progress_bar = generate_progress_bar(content_scores, "Content Score")
total_progress_bar = generate_progress_bar(total_score, "Total Score")
html_with_css = f"""
<style>
.legend {{
font-size: 22px;
display: flex;
align-items: center;
gap: 12px;
}}
.legend-dot {{
height: 15px;
width: 15px;
border-radius: 50%;
display: inline-block;
}}
.good {{ color: #28a745;
}}
.average {{ color: #ffc107;
}}
.bad {{ color: #dc3545;
}}
.wrong {{ color: #dc3545;
}}
.text {{
font-size: 20px;
margin-bottom: 20px;
}}
.progress-container {{
width: 100%;
background-color: #ddd;
border-radius: 13px;
overflow: hidden;
}}
.progress-bar {{
height: 30px;
line-height: 30px;
text-align: center;
font-size: 16px;
border-radius: 15px;
transition: width 1s ease;
}}
.progress-label {{
font-weight: bold;
font-size: 22px;
margin-bottom: 20px;
margin-top: 5px;
text-align: center;
}}
.progress-score {{
display: inline-block;
color: black;
}}
.progress-max {{
text-align: right;
margin: 10px;
font-size: 16px;
}}
</style>
<div class="legend">
<span class="legend-dot" style="background-color: #28a745;"></span><span>Good</span>
<span class="legend-dot" style="background-color: #ffc107;"></span><span>Understandable</span>
<span class="legend-dot" style="background-color: #dc3545;"></span><span>Bad</span>
<span class="legend-dot" style="background-color: #0000ff;"></span><span>No Speech</span>
</div>
{total_progress_bar}
{pronunciation_progress_bar}
{fluency_progress_bar}
{content_progress_bar}
"""
#
return html_with_css
# Define the Gradio interface
iface = gr.Interface(fn=analyze_audio,
inputs=[gr.Textbox(label='Training Text', placeholder='Write the text for pronunciation task', interactive=True, visible=True, show_copy_button=True,),
gr.Audio(label="Recoreded Audio", sources=['microphone', 'upload'])
],
outputs=[gr.HTML(label="Analysis of pronunciation"),
],
# css=additional_css,
# title="Audio Analysis Tool",
description="Write any text and recored an audio to predict pronunciation erors"
)
# Run the Gradio app
if __name__ == "__main__":
iface.launch(share=True)