File size: 4,315 Bytes
d804881
 
9ab32d7
5bae85d
d804881
 
9ab32d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d804881
9ab32d7
d804881
9ab32d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5bae85d
 
 
 
 
 
 
 
9ab32d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d804881
 
 
9ab32d7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import gradio as gr

from aip_trainer import app_logger
from aip_trainer.lambdas import lambdaSpeechToScore, lambdaTTS


js = """
function updateCssText(text, letters) {
    let wordsArr = text.split(" ")
    let lettersWordsArr = letters.split(" ")
    let speechOutputContainer = document.querySelector('#speech-output');
    speechOutputContainer.textContent = ""

    for (let idx in wordsArr) {
        let word = wordsArr[idx]
        let letterIsCorrect = lettersWordsArr[idx]
        for (let idx1 in word) {
        let letterCorrect = letterIsCorrect[idx1] == "1"
        let containerLetter = document.createElement("span")
        containerLetter.style.color = letterCorrect ? 'green' : "red"
        containerLetter.innerText = word[idx1];
        speechOutputContainer.appendChild(containerLetter)
        }
        let containerSpace = document.createElement("span")
        containerSpace.textContent = " "
        speechOutputContainer.appendChild(containerSpace)
    }
}
"""

with gr.Blocks() as gradio_app:
    app_logger.info("start gradio app building...")

    gr.Markdown(
        """
        # AI Pronunciation Trainer

        See [my fork](https://github.com/trincadev/ai-pronunciation-trainer) of [AI Pronunciation Trainer](https://github.com/Thiagohgl/ai-pronunciation-trainer) repositroy
        for more details.
        """
    )
    with gr.Row():
        with gr.Column(scale=4, min_width=300):
            with gr.Row():
                with gr.Column(scale=1, min_width=50):
                    language = gr.Radio(["de", "en"], label="Language", value="en")
                with gr.Column(scale=7, min_width=300):
                    learner_transcription = gr.Textbox(
                        lines=3,
                        label="Learner Transcription",
                        value="Hi there, how are you?",
                    )
            with gr.Row():
                learner_recording = gr.Audio(
                    label="Learner Recording",
                    sources=["microphone", "upload"],
                    type="filepath",
                )
            with gr.Row():
                tts = gr.Audio(label="tts")
                btn = gr.Button(value="TTS")
                btn.click(
                    fn=lambdaTTS.get_tts,
                    inputs=[learner_transcription, language],
                    outputs=tts,
                )
        with gr.Column(scale=3, min_width=300):
            transcripted_text = gr.Textbox(
                lines=2, placeholder=None, label="Transcripted text", visible=False
            )
            letter_correctness = gr.Textbox(
                lines=1,
                placeholder=None,
                label="Letters correctness",
                visible=False,
            )
            pronunciation_accuracy = gr.Textbox(
                lines=1, placeholder=None, label="Pronunciation accuracy %"
            )
            recording_ipa = gr.Textbox(
                lines=1, placeholder=None, label="Learner phonetic transcription"
            )
            ideal_ipa = gr.Textbox(
                lines=1, placeholder=None, label="Ideal phonetic transcription"
            )
            res = gr.Textbox(lines=1, placeholder=None, label="RES", visible=False)
            html_output = gr.HTML(
                label="Speech accuracy output",
                elem_id="speech-output",
                show_label=True,
                visible=True,
                render=True,
                value=" - ",
                elem_classes="speech-output",
            )
            btn = gr.Button(value="Recognize speech accuracy")
            # real_transcripts, is_letter_correct_all_words, pronunciation_accuracy, result['recording_ipa'], real_transcripts_ipa, res

    btn.click(
        lambdaSpeechToScore.get_speech_to_score_tuple,
        inputs=[learner_transcription, learner_recording, language],
        outputs=[
            transcripted_text,
            letter_correctness,
            pronunciation_accuracy,
            recording_ipa,
            ideal_ipa,
            res,
        ],
    )
    html_output.change(
        None,
        inputs=[transcripted_text, letter_correctness],
        outputs=[html_output],
        js=js,
    )


if __name__ == "__main__":
    gradio_app.launch()