Spaces:
Running
Running
File size: 7,285 Bytes
ca7e6be 0931910 d804881 6777887 0931910 9ab32d7 d51ffe7 1470bc9 d51ffe7 9ab32d7 0931910 6777887 9ab32d7 d51ffe7 9ab32d7 d51ffe7 9ab32d7 d51ffe7 1470bc9 d51ffe7 1470bc9 9ab32d7 1470bc9 9ab32d7 1470bc9 9ab32d7 d51ffe7 9ab32d7 6777887 9ab32d7 d51ffe7 6777887 ca7e6be 9ab32d7 ca7e6be 9ab32d7 ca7e6be 9ab32d7 1470bc9 d51ffe7 1470bc9 d51ffe7 9ab32d7 0931910 9ab32d7 d804881 6e31dbd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import json
from pathlib import Path
import gradio as gr
from aip_trainer import PROJECT_ROOT_FOLDER, app_logger, sample_rate_start
from aip_trainer.lambdas import js, lambdaGetSample, lambdaSpeechToScore, lambdaTTS
def clear():
return None
def clear2():
return None, None
with gr.Blocks() as gradio_app:
app_logger.info("start gradio app building...")
project_root_folder = Path(PROJECT_ROOT_FOLDER)
with open(project_root_folder / "aip_trainer" / "lambdas" / "app_description.md", "r", encoding="utf-8") as app_description_src:
app_description = app_description_src.read()
gr.Markdown(app_description.format(sample_rate_start=sample_rate_start))
with gr.Row():
with gr.Column(scale=4, min_width=300):
with gr.Row():
with gr.Column(scale=2, min_width=80):
language = gr.Radio(["de", "en"], label="Language", value="en")
with gr.Column(scale=5, min_width=160):
difficulty = gr.Radio(
label="Difficulty",
value=0,
choices=[
("random", 0),
("easy", 1),
("medium", 2),
("hard", 3),
],
)
with gr.Column(scale=1, min_width=100):
btn_random_phrase = gr.Button(value="Choose a random phrase")
with gr.Row():
with gr.Column(scale=7, min_width=300):
learner_transcription = gr.Textbox(
lines=3,
label="Learner Transcription",
value="Hi there, how are you?",
)
with gr.Row():
with gr.Column(scale=7, min_width=240):
audio_tts = gr.Audio(label="Audio TTS")
with gr.Column(scale=1, min_width=50):
btn_run_tts = gr.Button(value="Run TTS")
btn_clear_tts = gr.Button(value="Clear TTS")
btn_clear_tts.click(clear, inputs=[], outputs=[audio_tts])
with gr.Row():
audio_learner_recording_stt = gr.Audio(
label="Learner Recording",
sources=["microphone", "upload"],
type="filepath",
show_download_button=True,
)
with gr.Column(scale=4, min_width=320):
transcripted_text = gr.Textbox(
lines=2, placeholder=None, label="Transcripted text", visible=False
)
letter_correctness = gr.Textbox(
lines=1,
placeholder=None,
label="Letters correctness",
visible=False,
)
with gr.Row():
with gr.Column(scale=3, min_width=100):
pronunciation_accuracy = gr.Number(label="Current pronunciation accuracy %")
with gr.Column(scale=2, min_width=100):
number_score_de = gr.Number(label="Score DE", value=0)
with gr.Column(scale=2, min_width=100):
number_score_en = gr.Number(label="Score EN", value=0)
recording_ipa = gr.Textbox(
lines=1, placeholder=None, label="Learner phonetic transcription"
)
ideal_ipa = gr.Textbox(
lines=1, placeholder=None, label="Ideal phonetic transcription"
)
res = gr.Textbox(lines=1, placeholder=None, label="RES", visible=False)
html_output = gr.HTML(
label="Speech accuracy output",
elem_id="speech-output",
show_label=True,
visible=True,
render=True,
value=" - ",
elem_classes="speech-output",
)
with gr.Row():
btn = gr.Button(value="Recognize speech accuracy")
with gr.Accordion("Click here to expand the table examples", open=False):
examples_text = gr.Examples(
examples=[
["Hallo, wie geht es dir?", "de", 1],
["Hi there, how are you?", "en", 1],
["Die König-Ludwig-Eiche ist ein Naturdenkmal im Staatsbad Brückenau.", "de", 2,],
["Rome is home to some of the most beautiful monuments in the world.", "en", 2],
["Die König-Ludwig-Eiche ist ein Naturdenkmal im Staatsbad Brückenau, einem Ortsteil des drei Kilometer nordöstlich gelegenen Bad Brückenau im Landkreis Bad Kissingen in Bayern.", "de", 3],
["Some machine learning models are designed to understand and generate human-like text based on the input they receive.", "en", 3],
],
inputs=[learner_transcription, language, difficulty],
)
def get_updated_score_by_language(text: str, audio_rec: str | Path, lang: str, score_de: float, score_en: float):
_transcripted_text, _letter_correctness, _pronunciation_accuracy, _recording_ipa, _ideal_ipa, _res = lambdaSpeechToScore.get_speech_to_score_tuple(text, audio_rec, lang)
output = {
transcripted_text: _transcripted_text,
letter_correctness: _letter_correctness,
pronunciation_accuracy: _pronunciation_accuracy,
recording_ipa: _recording_ipa,
ideal_ipa: _ideal_ipa,
res: _res,
}
match lang:
case "de":
return {
number_score_de: float(score_de) + float(_pronunciation_accuracy),
number_score_en: float(score_en),
**output
}
case "en":
return {
number_score_en: float(score_en) + float(_pronunciation_accuracy),
number_score_de: float(score_de),
**output
}
case _:
raise NotImplementedError(f"Language {lang} not supported")
btn.click(
get_updated_score_by_language,
inputs=[learner_transcription, audio_learner_recording_stt, language, number_score_de, number_score_en],
outputs=[
transcripted_text,
letter_correctness,
pronunciation_accuracy,
recording_ipa,
ideal_ipa,
res,
number_score_de, number_score_en
],
)
btn_run_tts.click(
fn=lambdaTTS.get_tts,
inputs=[learner_transcription, language],
outputs=audio_tts,
)
btn_random_phrase.click(
lambdaGetSample.get_random_selection,
inputs=[language, difficulty],
outputs=[learner_transcription],
)
btn_random_phrase.click(
clear2,
inputs=[],
outputs=[audio_learner_recording_stt, audio_tts]
)
html_output.change(
None,
inputs=[transcripted_text, letter_correctness],
outputs=[html_output],
js=js.js_update_ipa_output,
)
if __name__ == "__main__":
gradio_app.launch()
|