alessandro trinca tornidor commited on
Commit
38f204d
1 Parent(s): 183d840

feat: update driver.js guided tour

Browse files
Files changed (2) hide show
  1. aip_trainer/lambdas/js.py +19 -11
  2. app.py +43 -35
aip_trainer/lambdas/js.py CHANGED
@@ -84,19 +84,27 @@ head_driver_tour = """
84
  <script src="https://cdnjs.cloudflare.com/ajax/libs/driver.js/1.3.1/driver.js.iife.js" integrity="sha512-8EdV4D5VlQLX0dJFcdx6h/oJ/NanAIMlaViz57NDkhzwbQsxabgpFua0gzM4f5vdk60CfRAydhlbfbDThMfh3w==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
85
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/driver.js/1.3.1/driver.css" integrity="sha512-jRsM62XMRl33ewZ0Si7yX6ANq+ZiWwUcvPk4H2DKr417W80rPMXzbD/towhs2YEoux/dfOuVRkLB+5Tfzmfolg==" crossorigin="anonymous" referrerpolicy="no-referrer" />
86
  <script type="module">
87
- console.log("window driver:", window, "!#")
88
- const driver0 = window.driver;
89
- console.log("driver0:", driver0, "!#")
90
- const driverJs = driver0.js;
91
- console.log("driverJs:", driverJs, "!#")
92
- const driver = driverJs.driver;
93
-
94
- console.log("driver:", driver, "!#")
95
 
96
  const driverSteps = [
97
- { element: 'id-ai-pronunciation-trainer-gradio-app-container', popover: { title: 'AI Pronunciation Trainer Gradio app', description: 'A quick tour about my Pronunciation Trainer Gradio app functionality' } },
98
- { element: '#btn-run-tts-id-element', popover: { title: 'btn_run_tts', description: 'a button to run btn_run_tts' } },
99
- { element: '#btn-run-tts-backend-id-element', popover: { title: 'btn_run_tts_backend', description: 'a button to run btn_run_tts_backend' } }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  ]
101
  const driverObj = driver({
102
  showProgress: true,
 
84
  <script src="https://cdnjs.cloudflare.com/ajax/libs/driver.js/1.3.1/driver.js.iife.js" integrity="sha512-8EdV4D5VlQLX0dJFcdx6h/oJ/NanAIMlaViz57NDkhzwbQsxabgpFua0gzM4f5vdk60CfRAydhlbfbDThMfh3w==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
85
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/driver.js/1.3.1/driver.css" integrity="sha512-jRsM62XMRl33ewZ0Si7yX6ANq+ZiWwUcvPk4H2DKr417W80rPMXzbD/towhs2YEoux/dfOuVRkLB+5Tfzmfolg==" crossorigin="anonymous" referrerpolicy="no-referrer" />
86
  <script type="module">
87
+ const driver = window.driver.js.driver;
 
 
 
 
 
 
 
88
 
89
  const driverSteps = [
90
+ { element: "id-ai-pronunciation-trainer-gradio-app-container", popover: { title: "AI Pronunciation Trainer Gradio app", description: "A quick tour of the features of the Gradio app 'AI Pronunciation Trainer'." } },
91
+ { element: "#radio-language-id-element", popover: { title: "Languages", description: "Choose the input language." } },
92
+ { element: "#radio-difficulty-id-element", popover: { title: "Difficulty", description: "Choose the difficulty for the random selection of the sentence to be used as input for speech recognition." } },
93
+ { element: "#btn-random-phrase-id-element", popover: { title: "Choose a random phrase", description: "Choose a random sentence to be used as input for speech recognition." } }
94
+ { element: "#text-student-transcription-id-element", popover: { title: "Phrase to read for speech recognition", description: "Enter the sentence to be used as input for speech recognition." } },
95
+ { element: "#audio-tts-id-element", popover: { title: "Audio TTS", description: "Speech-to-text audio output." } },
96
+ { element: "#btn-run-tts-id-element", popover: { title: "In-browser Text-to-Speech", description: "Execute the text-to-speech functionality in the browser by reading the student's transcription." } },
97
+ { element: "#btn-run-tts-backend-id-element", popover: { title: "Backend Text-to-Speech", description: "Execute the text-to-speech functionality in the backend by reading the student's transcription." } },
98
+ { element: "#btn-clear-tts-backend-id-element", popover: { title: "Clear Text-to-Speech", description: "Clear the synthetic audio output of the text-to-speech synthesis." } },
99
+ { element: "#audio-student-recording-stt-id-element", popover: { title: "Speech-to-Text audio output", description: "Recorded audio output of the speech recognition." } },
100
+ { element: "#text-student-recording-ipa-id-element", popover: { title: "Student phonetic transcription", description: "Phonetic transcription of the student's speech." } },
101
+ { element: "#text-ideal-ipa-id-element", popover: { title: "'Ideal' phonetic transcription", description: "'Ideal' phonetic transcription." } },
102
+ { element: ".speech-output-group", popover: { title: "Detailed speech accuracy output", description: "Detailed output of speech accuracy, word by word." } },
103
+ { element: "#number-pronunciation-accuracy-id-element", popover: { title: "Current accuracy", description: "Current speech accuracy." } },
104
+ { element: "#number-score-de-id-element", popover: { title: "Global German accuracy", description: "Total speech accuracy in German." } },
105
+ { element: "#number-score-en-id-element", popover: { title: "Global English accuracy", description: "Total speech accuracy in English." } },
106
+ { element: "#btn-recognize-speech-accuracy-id-element", popover: { title: "Run speech accuracy recognition", description: "Execute the speech accuracy recognition." } },
107
+ { element: "#accordion-examples-id-element", popover: { title: "Text examples", description: "Some text examples to be used as input for speech recognition." } },
108
  ]
109
  const driverObj = driver({
110
  showProgress: true,
app.py CHANGED
@@ -6,8 +6,11 @@ from aip_trainer.lambdas import js, lambdaGetSample, lambdaSpeechToScore, lambda
6
 
7
 
8
  css = """
9
- .speech-output-label p {color: grey;}
10
- .speech-output-container {align-items: center; min-height: 60px; padding-left: 8px; padding-right: 8px; margin-top: -12px; border-width: 1px; border-style: solid; border-color: lightgrey;}
 
 
 
11
  """
12
 
13
 
@@ -31,7 +34,7 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
31
  with gr.Column(scale=4, min_width=300):
32
  with gr.Row():
33
  with gr.Column(scale=2, min_width=80):
34
- radio_language = gr.Radio(["de", "en"], label="Language", value="en")
35
  with gr.Column(scale=5, min_width=160):
36
  radio_difficulty = gr.Radio(
37
  label="Difficulty",
@@ -42,29 +45,32 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
42
  ("medium", 2),
43
  ("hard", 3),
44
  ],
 
45
  )
46
  with gr.Column(scale=1, min_width=100):
47
- btn_random_phrase = gr.Button(value="Choose a random phrase")
48
  with gr.Row():
49
  with gr.Column(scale=7, min_width=300):
50
- text_learner_transcription = gr.Textbox(
51
  lines=3,
52
- label="Learner Transcription",
53
  value="Hi there, how are you?",
 
54
  )
55
  with gr.Row():
56
- audio_tts = gr.Audio(label="Audio TTS")
57
  with gr.Row():
58
  btn_run_tts = gr.Button(value="TTS in browser", elem_id="btn-run-tts-id-element")
59
  btn_run_tts_backend = gr.Button(value="TTS backend", elem_id="btn-run-tts-backend-id-element")
60
- btn_clear_tts = gr.Button(value="Clear TTS backend")
61
  btn_clear_tts.click(clear, inputs=[], outputs=[audio_tts])
62
  with gr.Row():
63
- audio_learner_recording_stt = gr.Audio(
64
- label="Learner Recording",
65
  sources=["microphone", "upload"],
66
  type="filepath",
67
  show_download_button=True,
 
68
  )
69
  with gr.Column(scale=4, min_width=320):
70
  text_transcribed_hidden = gr.Textbox(
@@ -76,35 +82,36 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
76
  visible=False,
77
  )
78
  text_recording_ipa = gr.Textbox(
79
- placeholder=None, label="Learner phonetic transcription"
80
  )
81
  text_ideal_ipa = gr.Textbox(
82
- placeholder=None, label="Ideal phonetic transcription"
83
  )
84
  text_raw_json_output_hidden = gr.Textbox(placeholder=None, label="text_raw_json_output_hidden", visible=False)
85
- gr.Markdown("Speech accuracy output", elem_classes="speech-output-label")
86
- with gr.Row(elem_classes="speech-output-container"):
87
- html_output = gr.HTML(
88
- label="Speech accuracy output",
89
- elem_id="speech-output",
90
- show_label=False,
91
- visible=True,
92
- render=True,
93
- value=" - ",
94
- elem_classes="speech-output",
95
- )
 
96
  with gr.Row():
97
- gr.Markdown("### Speech accuracy score (%)", elem_classes="speech-accuracy-score-container row1")
98
  with gr.Row():
99
  with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col1"):
100
- number_pronunciation_accuracy = gr.Number(label="Current score")
101
  with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col2"):
102
- number_score_de = gr.Number(label="Global score DE", value=0, interactive=False)
103
  with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col3"):
104
- number_score_en = gr.Number(label="Global score EN", value=0, interactive=False)
105
  with gr.Row():
106
- btn = gr.Button(value="Recognize speech accuracy")
107
- with gr.Accordion("Click here to expand the table examples", open=False):
108
  examples_text = gr.Examples(
109
  examples=[
110
  ["Hallo, wie geht es dir?", "de", 1],
@@ -114,7 +121,8 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
114
  ["Die König-Ludwig-Eiche ist ein Naturdenkmal im Staatsbad Brückenau, einem Ortsteil des drei Kilometer nordöstlich gelegenen Bad Brückenau im Landkreis Bad Kissingen in Bayern.", "de", 3],
115
  ["Some machine learning models are designed to understand and generate human-like text based on the input they receive.", "en", 3],
116
  ],
117
- inputs=[text_learner_transcription, radio_language, radio_difficulty],
 
118
  )
119
 
120
  def get_updated_score_by_language(text: str, audio_rec: str | Path, lang: str, score_de: float, score_en: float):
@@ -145,7 +153,7 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
145
 
146
  btn.click(
147
  get_updated_score_by_language,
148
- inputs=[text_learner_transcription, audio_learner_recording_stt, radio_language, number_score_de, number_score_en],
149
  outputs=[
150
  text_transcribed_hidden,
151
  text_letter_correctness,
@@ -156,21 +164,21 @@ with gr.Blocks(css=css, head=js.head_driver_tour) as gradio_app:
156
  number_score_de, number_score_en
157
  ],
158
  )
159
- btn_run_tts.click(fn=None, inputs=[text_learner_transcription, radio_language], outputs=audio_tts, js=js.js_play_audio)
160
  btn_run_tts_backend.click(
161
  fn=lambdaTTS.get_tts,
162
- inputs=[text_learner_transcription, radio_language],
163
  outputs=audio_tts,
164
  )
165
  btn_random_phrase.click(
166
  lambdaGetSample.get_random_selection,
167
  inputs=[radio_language, radio_difficulty],
168
- outputs=[text_learner_transcription],
169
  )
170
  btn_random_phrase.click(
171
  clear2,
172
  inputs=[],
173
- outputs=[audio_learner_recording_stt, audio_tts]
174
  )
175
  html_output.change(
176
  None,
 
6
 
7
 
8
  css = """
9
+ .speech-output-label p {color: grey; margin-bottom: white;}
10
+ .background-white {background-color: white !important; }
11
+ .speech-output-group {padding: 12px;}
12
+ .speech-output-container {min-height: 60px;}
13
+ .speech-output-html {text-align: left; }
14
  """
15
 
16
 
 
34
  with gr.Column(scale=4, min_width=300):
35
  with gr.Row():
36
  with gr.Column(scale=2, min_width=80):
37
+ radio_language = gr.Radio(["de", "en"], label="Language", value="en", elem_id="radio-language-id-element")
38
  with gr.Column(scale=5, min_width=160):
39
  radio_difficulty = gr.Radio(
40
  label="Difficulty",
 
45
  ("medium", 2),
46
  ("hard", 3),
47
  ],
48
+ elem_id="radio-difficulty-id-element",
49
  )
50
  with gr.Column(scale=1, min_width=100):
51
+ btn_random_phrase = gr.Button(value="Choose a random phrase", elem_id="btn-random-phrase-id-element")
52
  with gr.Row():
53
  with gr.Column(scale=7, min_width=300):
54
+ text_student_transcription = gr.Textbox(
55
  lines=3,
56
+ label="Phrase to read for speech recognition",
57
  value="Hi there, how are you?",
58
+ elem_id="text-student-transcription-id-element",
59
  )
60
  with gr.Row():
61
+ audio_tts = gr.Audio(label="Audio TTS", elem_id="audio-tts-id-element")
62
  with gr.Row():
63
  btn_run_tts = gr.Button(value="TTS in browser", elem_id="btn-run-tts-id-element")
64
  btn_run_tts_backend = gr.Button(value="TTS backend", elem_id="btn-run-tts-backend-id-element")
65
+ btn_clear_tts = gr.Button(value="Clear TTS backend", elem_id="btn-clear-tts-backend-id-element")
66
  btn_clear_tts.click(clear, inputs=[], outputs=[audio_tts])
67
  with gr.Row():
68
+ audio_student_recording_stt = gr.Audio(
69
+ label="Speech-toText audio output",
70
  sources=["microphone", "upload"],
71
  type="filepath",
72
  show_download_button=True,
73
+ elem_id="audio-student-recording-stt-id-element",
74
  )
75
  with gr.Column(scale=4, min_width=320):
76
  text_transcribed_hidden = gr.Textbox(
 
82
  visible=False,
83
  )
84
  text_recording_ipa = gr.Textbox(
85
+ placeholder=None, label="Student phonetic transcription", elem_id="text-student-recording-ipa-id-element"
86
  )
87
  text_ideal_ipa = gr.Textbox(
88
+ placeholder=None, label="Ideal phonetic transcription", elem_id="text-ideal-ipa-id-element"
89
  )
90
  text_raw_json_output_hidden = gr.Textbox(placeholder=None, label="text_raw_json_output_hidden", visible=False)
91
+ with gr.Group(elem_classes="speech-output-group background-white"):
92
+ gr.Markdown("Speech accuracy output", elem_classes="speech-output-label background-white")
93
+ with gr.Group(elem_classes="speech-output-container background-white"):
94
+ html_output = gr.HTML(
95
+ label="Speech accuracy output",
96
+ elem_id="speech-output",
97
+ show_label=False,
98
+ visible=True,
99
+ render=True,
100
+ value=" - ",
101
+ elem_classes="speech-output-html background-white",
102
+ )
103
  with gr.Row():
104
+ gr.Markdown("### Speech accuracy score (%)", elem_classes="speech-accuracy-score-container row1", elem_id="speech-accuracy-score-container-id-element")
105
  with gr.Row():
106
  with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col1"):
107
+ number_pronunciation_accuracy = gr.Number(label="Current score", elem_id="number-pronunciation-accuracy-id-element")
108
  with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col2"):
109
+ number_score_de = gr.Number(label="Global score DE", value=0, interactive=False, elem_id="number-score-de-id-element")
110
  with gr.Column(min_width=100, elem_classes="speech-accuracy-score-container row2 col3"):
111
+ number_score_en = gr.Number(label="Global score EN", value=0, interactive=False, elem_id="number-score-en-id-element")
112
  with gr.Row():
113
+ btn = gr.Button(value="Recognize speech accuracy", elem_id="btn-recognize-speech-accuracy-id-element")
114
+ with gr.Accordion("Click here to expand the table examples", open=True, elem_id="accordion-examples-id-element"):
115
  examples_text = gr.Examples(
116
  examples=[
117
  ["Hallo, wie geht es dir?", "de", 1],
 
121
  ["Die König-Ludwig-Eiche ist ein Naturdenkmal im Staatsbad Brückenau, einem Ortsteil des drei Kilometer nordöstlich gelegenen Bad Brückenau im Landkreis Bad Kissingen in Bayern.", "de", 3],
122
  ["Some machine learning models are designed to understand and generate human-like text based on the input they receive.", "en", 3],
123
  ],
124
+ inputs=[text_student_transcription, radio_language, radio_difficulty],
125
+ elem_id="examples-text-id-element",
126
  )
127
 
128
  def get_updated_score_by_language(text: str, audio_rec: str | Path, lang: str, score_de: float, score_en: float):
 
153
 
154
  btn.click(
155
  get_updated_score_by_language,
156
+ inputs=[text_student_transcription, audio_student_recording_stt, radio_language, number_score_de, number_score_en],
157
  outputs=[
158
  text_transcribed_hidden,
159
  text_letter_correctness,
 
164
  number_score_de, number_score_en
165
  ],
166
  )
167
+ btn_run_tts.click(fn=None, inputs=[text_student_transcription, radio_language], outputs=audio_tts, js=js.js_play_audio)
168
  btn_run_tts_backend.click(
169
  fn=lambdaTTS.get_tts,
170
+ inputs=[text_student_transcription, radio_language],
171
  outputs=audio_tts,
172
  )
173
  btn_random_phrase.click(
174
  lambdaGetSample.get_random_selection,
175
  inputs=[radio_language, radio_difficulty],
176
+ outputs=[text_student_transcription],
177
  )
178
  btn_random_phrase.click(
179
  clear2,
180
  inputs=[],
181
+ outputs=[audio_student_recording_stt, audio_tts]
182
  )
183
  html_output.change(
184
  None,