CAPT-ReadAloud / app.py
seba3y's picture
Update app.py
91d2a90
raw
history blame
No virus
2.42 kB
import gradio as gr
from logic import compare_audio_with_text
from scipy.io import wavfile
def create_html_from_scores(word_scores):
html_output = ''
# Ensure the number of words and scores match
for word, score in word_scores:
if score == 1:
html_output += f'<span style="color: red;">{word}</span> '
elif score == 2:
html_output += f'<span style="color: orange;">{word}</span> '
else:
html_output += f'<span style="color: green;">{word}</span> '
return html_output
def analyze_audio(text, audio):
# Write the processed audio to a temporary WAV file
temp_filename = 'temp_audio.wav'
wavfile.write(temp_filename, audio[0], audio[1])
result = compare_audio_with_text(temp_filename, text)
html_content = create_html_from_scores(result)
html_with_css = f"""
<style>
.legend {{
font-size: 22px;
display: flex;
align-items: center;
gap: 12px;
}}
.legend-dot {{
height: 15px;
width: 15px;
border-radius: 50%;
display: inline-block;
}}
.good {{ color: #28a745; }}
.average {{ color: #ffc107; }}
.bad {{ color: #dc3545; }}
.text {{ font-size: 20px; }}
</style>
<div class="legend">
<span class="legend-dot" style="background-color: #28a745;"></span><span>Good</span>
<span class="legend-dot" style="background-color: #ffc107;"></span><span>Understandable</span>
<span class="legend-dot" style="background-color: #dc3545;"></span><span>Bad</span>
</div>
<p class="text">
{html_content}
</p>
"""
return html_with_css
# Define the Gradio interface
iface = gr.Interface(fn=analyze_audio,
inputs=[gr.Textbox(label='Training Text', placeholder='Write the text for pronunciation task', interactive=True, visible=True, show_copy_button=True,),
gr.Audio(label="Recoreded Audio")
],
outputs=[gr.HTML(label="Analysis of pronunciation"),
],
# css=additional_css,
# title="Audio Analysis Tool",
description="Write any text and recored an audio to predict pronunciation erors"
)
# Run the Gradio app
if __name__ == "__main__":
iface.launch()