from engine.speech2text import Speech2Text from engine.ner_engine import NERInfer from engine.audio_chunk import AudioChunk import glob import time import gradio as gr import os audio_chunk = AudioChunk() speech2text = Speech2Text() ner_tag = NERInfer() text = "" def streaming_process(streaming_audio_file) -> str: global text text = speech2text.process_streaming(streaming_audio_file) # return text def output_streaming(text_streaming,text01)-> str: text_streaming+=text01 return text_streaming def recorded_process(recorded_audio_file) -> str: """ to get both input and use speech2text for get text """ text = speech2text.process_microphone(recorded_audio_file) result = ner_tag.infer(text) return {"text": text, "entities": result} def uploaded_process(uploaded_audio_file) -> str: audio_chunk.chunks_audio(uploaded_audio_file) text = speech2text.process_audio() result = ner_tag.infer(text) return {"text": text, "entities": result} def clear_inputs_and_outputs() -> list: """ Clears all inputs and outputs when the user clicks "Clear" button """ audio_chunk.remove_chunk() return [None, None, None, None] text_streaming = "" with gr.Blocks() as demo: """ buld gradio app """ gr.Markdown( """ ![logo](/file=./image/clicknext_logo2x.png) # Automatic Speech Recognition ##### Experience real-time, accurate, and multilingual speech-to-text conversion with our cutting-edge ASR technology. """ ) with gr.Tab("Record File"): with gr.Row(): with gr.Column(): mic_input = gr.Microphone( type="filepath",label="Record voice") with gr.Row(): clr_btn = gr.Button(value="Clear", variant="secondary") sub_btn = gr.Button(value="submit") with gr.Column(): lbl_output = gr.HighlightedText(label="Result") clr_btn.click( fn=clear_inputs_and_outputs, inputs=[], outputs=[mic_input, lbl_output] ) sub_btn.click( fn=recorded_process, inputs=[mic_input], outputs=[lbl_output] ) with gr.Tab("streaming"): gr.Interface( fn=streaming_process, inputs=[ gr.Microphone(type="filepath", streaming=True)], outputs=[ # # gr.HighlightedText(label="Result"), gr.Textbox(type ="text", label="Result",)], live=True, allow_flagging="never" ) with gr.Row(): with gr.Column(): print(text) text_streaming = output_streaming(text_streaming,text) gr.Textbox(value=text, label="Result", autofocus=True) with gr.Tab("Upload File"): with gr.Row(): with gr.Column(): upl_input = gr.Audio( type="filepath", label="Upload a file") with gr.Row(): clr_btn = gr.Button(value="Clear", variant="secondary") sub_btn = gr.Button(value="submit") gr.Examples(examples=[ os.path.join(os.path.dirname(__file__),"examples/politics.mp3"), os.path.join(os.path.dirname(__file__),"examples/law1.mp3"), os.path.join(os.path.dirname(__file__),"examples/law2.mp3"), os.path.join(os.path.dirname(__file__),"examples/law3.mp3"), os.path.join(os.path.dirname(__file__),"examples/economy.mp3"), os.path.join(os.path.dirname(__file__),"examples/general.mp3") ], inputs = upl_input) with gr.Column(): lbl_output = gr.HighlightedText(label="Result") clr_btn.click( fn=clear_inputs_and_outputs, inputs=[], outputs=[upl_input, lbl_output] ) sub_btn.click( fn=uploaded_process, inputs=[upl_input], outputs=[lbl_output] ) demo.launch(favicon_path = "./image/fe_logo.png", server_name="0.0.0.0", server_port=8085)