import gradio as gr # from logic import Speaker_speech_analysis from scipy.io import wavfile from wav2vec_aligen import speaker_pronunciation_assesment def create_html_from_scores(word_levels): html_output = '' for word, level in word_levels: if level == '/': html_output += f'{level} ' elif level == 'Wrong': html_output += f'{word} ' elif level == 'Understandable': html_output += f'{word} ' else: html_output += f'{word} ' return html_output def generate_progress_bar(score, label): score = round(score, 2) score_text = f"{score:.2f}" if score < 100 else "100" if score < 30: bar_color = "#dc3545" elif score < 60: bar_color = "#dc6545" elif score < 80: bar_color = "#ffc107" else: bar_color = "#28a745" bar_length = f"{(score / 100) * 100}%" return f"""
{label}:
{score_text}
Max: 100
""" # CSS to be used in the Gradio Interface def analyze_audio(text, audio): # Write the processed audio to a temporary WAV file if text is None or audio is None: return 'the audio or the text is missing' temp_filename = 'temp_audio.wav' wavfile.write(temp_filename, audio[0], audio[1]) result = speaker_pronunciation_assesment(temp_filename) accuracy_score = result['pronunciation_accuracy'] fluency_score = result['fluency_score'] total_score = result['total_score'] content_scores = result['content_scores'] pronunciation_progress_bar = generate_progress_bar(accuracy_score, "Pronunciation Accuracy") fluency_progress_bar = generate_progress_bar(fluency_score, "Fluency Score") content_progress_bar = generate_progress_bar(content_scores, "Content Score") total_progress_bar = generate_progress_bar(total_score, "Total Score") html_with_css = f"""
Good Understandable Bad No Speech
{total_progress_bar} {pronunciation_progress_bar} {fluency_progress_bar} {content_progress_bar} """ # return html_with_css # Define the Gradio interface iface = gr.Interface(fn=analyze_audio, inputs=[gr.Textbox(label='Training Text', placeholder='Write the text for pronunciation task', interactive=True, visible=True, show_copy_button=True,), gr.Audio(label="Recoreded Audio", sources=['microphone', 'upload']) ], outputs=[gr.HTML(label="Analysis of pronunciation"), ], # css=additional_css, # title="Audio Analysis Tool", description="Write any text and recored an audio to predict pronunciation erors" ) # Run the Gradio app if __name__ == "__main__": iface.launch(share=True)