Spaces:

EuroPython2022
/

automatic-speech-recognition-with-next-gen-kaldi

Runtime error

App Files Files Community

csukuangfj commited on Jul 19, 2022

Commit

d3fbbd7

1 Parent(s): a97e72d

output rtf

Browse files

Files changed (1) hide show

app.py +92 -9

app.py CHANGED Viewed

@@ -40,13 +40,80 @@ def convert_to_wav(in_filename: str) -> str:
     return out_filename
 def process(
     in_filename: str,
     language: str,
     repo_id: str,
     decoding_method: str,
     num_active_paths: int,
-) -> str:
     logging.info(f"in_filename: {in_filename}")
     logging.info(f"language: {language}")
     logging.info(f"repo_id: {repo_id}")
@@ -88,11 +155,16 @@ def process(
     rtf = (end - start) / duration
     logging.info(f"Finished at {date_time} s. Elapsed: {end - start: .3f} s")
-    logging.info(f"Duration {duration: .3f} s")
-    logging.info(f"RTF {rtf: .3f}")
     logging.info(f"hyp:\n{hyp}")
-    return hyp
 title = "# Automatic Speech Recognition with Next-gen Kaldi"
@@ -107,6 +179,15 @@ See more information by visiting the following links:
 - <https://github.com/lhotse-speech/lhotse>
 """
 def update_model_dropdown(language: str):
     if language in language_to_models:
@@ -116,7 +197,7 @@ def update_model_dropdown(language: str):
     raise ValueError(f"Unsupported language: {language}")
-demo = gr.Blocks()
 with demo:
     gr.Markdown(title)
@@ -162,6 +243,7 @@ with demo:
             )
             upload_button = gr.Button("Submit for recognition")
             uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
         with gr.TabItem("Record from microphone"):
             microphone = gr.Audio(
@@ -173,9 +255,10 @@ with demo:
             record_button = gr.Button("Submit for recognition")
             recorded_output = gr.Textbox(label="Recognized speech from recordings")
         upload_button.click(
-            process,
             inputs=[
                 uploaded_file,
                 language_radio,
@@ -183,10 +266,10 @@ with demo:
                 decoding_method_radio,
                 num_active_paths_slider,
             ],
-            outputs=uploaded_output,
         )
         record_button.click(
-            process,
             inputs=[
                 microphone,
                 language_radio,
@@ -194,7 +277,7 @@ with demo:
                 decoding_method_radio,
                 num_active_paths_slider,
             ],
-            outputs=recorded_output,
         )
     gr.Markdown(description)

     return out_filename
+def build_html_output(s: str, style: str = "result_item_success"):
+    return f"""
+    <div class='result'>
+        <div class='result_item {style}'>
+          {s}
+        </div>
+    </div>
+    """
+def process_uploaded_file(
+    in_filename: str,
+    language: str,
+    repo_id: str,
+    decoding_method: str,
+    num_active_paths: int,
+):
+    if in_filename is None or in_filename == "":
+        return "", build_html_output(
+            "Please first upload a file and then click "
+            'the button "submit for recognition"',
+            "result_item_error",
+        )
+    logging.info(f"Processing uploaded file: {in_filename}")
+    try:
+        return process(
+            in_filename=in_filename,
+            language=language,
+            repo_id=repo_id,
+            decoding_method=decoding_method,
+            num_active_paths=num_active_paths,
+        )
+    except Exception as e:
+        logging.info(str(e))
+        return "", build_html_output(str(e), "result_item_error")
+def process_microphone(
+    in_filename: str,
+    language: str,
+    repo_id: str,
+    decoding_method: str,
+    num_active_paths: int,
+):
+    if in_filename is None or in_filename == "":
+        return "", build_html_output(
+            "Please first click 'Record from microphone', speak, "
+            "click 'Stop recording', and then "
+            "click the button 'submit for recognition'",
+            "result_item_error",
+        )
+    logging.info(f"Processing microphone: {in_filename}")
+    try:
+        return process(
+            in_filename=in_filename,
+            language=language,
+            repo_id=repo_id,
+            decoding_method=decoding_method,
+            num_active_paths=num_active_paths,
+        )
+    except Exception as e:
+        logging.info(str(e))
+        return "", build_html_output(str(e), "result_item_error")
 def process(
     in_filename: str,
     language: str,
     repo_id: str,
     decoding_method: str,
     num_active_paths: int,
+):
     logging.info(f"in_filename: {in_filename}")
     logging.info(f"language: {language}")
     logging.info(f"repo_id: {repo_id}")
     rtf = (end - start) / duration
     logging.info(f"Finished at {date_time} s. Elapsed: {end - start: .3f} s")
+    info = f"""
+    Wave duration  : {duration: .3f} s <br/>
+    Processing time: {end - start: .3f} s <br/>
+    RTF: {end - start: .3f}/{duration: .3f} = {(end - start)/duration:.3f} <br/>
+    """
+    logging.info(info)
     logging.info(f"hyp:\n{hyp}")
+    return hyp, build_html_output(info)
 title = "# Automatic Speech Recognition with Next-gen Kaldi"
 - <https://github.com/lhotse-speech/lhotse>
 """
+# css style is copied from
+# https://huggingface.co/spaces/alphacep/asr/blob/main/app.py#L113
+css = """
+.result {display:flex;flex-direction:column}
+.result_item {padding:15px;margin-bottom:8px;border-radius:15px;width:100%}
+.result_item_success {background-color:mediumaquamarine;color:white;align-self:start}
+.result_item_error {background-color:#ff7070;color:white;align-self:start}
+"""
 def update_model_dropdown(language: str):
     if language in language_to_models:
     raise ValueError(f"Unsupported language: {language}")
+demo = gr.Blocks(css=css)
 with demo:
     gr.Markdown(title)
             )
             upload_button = gr.Button("Submit for recognition")
             uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
+            uploaded_html_info = gr.HTML(label="Info")
         with gr.TabItem("Record from microphone"):
             microphone = gr.Audio(
             record_button = gr.Button("Submit for recognition")
             recorded_output = gr.Textbox(label="Recognized speech from recordings")
+            recorded_html_info = gr.HTML(label="Info")
         upload_button.click(
+            process_uploaded_file,
             inputs=[
                 uploaded_file,
                 language_radio,
                 decoding_method_radio,
                 num_active_paths_slider,
             ],
+            outputs=[uploaded_output, uploaded_html_info],
         )
         record_button.click(
+            process_microphone,
             inputs=[
                 microphone,
                 language_radio,
                 decoding_method_radio,
                 num_active_paths_slider,
             ],
+            outputs=[recorded_output, recorded_html_info],
         )
     gr.Markdown(description)