|
from engine.speech2text import Speech2Text |
|
from engine.ner_engine import NERInfer |
|
from engine.audio_chunk import AudioChunk |
|
import glob |
|
import time |
|
import gradio as gr |
|
import os |
|
|
|
audio_chunk = AudioChunk() |
|
speech2text = Speech2Text() |
|
ner_tag = NERInfer() |
|
|
|
text = "" |
|
def streaming_process(streaming_audio_file) -> str: |
|
global text |
|
text = speech2text.process_streaming(streaming_audio_file) |
|
|
|
|
|
def output_streaming(text_streaming,text01)-> str: |
|
text_streaming+=text01 |
|
return text_streaming |
|
|
|
def recorded_process(recorded_audio_file) -> str: |
|
""" |
|
to get both input |
|
and use speech2text for get text |
|
""" |
|
text = speech2text.process_microphone(recorded_audio_file) |
|
result = ner_tag.infer(text) |
|
return {"text": text, "entities": result} |
|
|
|
|
|
def uploaded_process(uploaded_audio_file) -> str: |
|
audio_chunk.chunks_audio(uploaded_audio_file) |
|
text = speech2text.process_audio() |
|
result = ner_tag.infer(text) |
|
return {"text": text, "entities": result} |
|
|
|
|
|
def clear_inputs_and_outputs() -> list: |
|
""" |
|
Clears all inputs and outputs when the user clicks "Clear" button |
|
""" |
|
audio_chunk.remove_chunk() |
|
return [None, None, None, None] |
|
|
|
text_streaming = "" |
|
|
|
with gr.Blocks() as demo: |
|
""" |
|
buld gradio app |
|
|
|
""" |
|
gr.Markdown( |
|
""" |
|
![logo](/file=./image/clicknext_logo2x.png) |
|
|
|
# Automatic Speech Recognition |
|
|
|
##### Experience real-time, accurate, and multilingual speech-to-text conversion with our cutting-edge ASR technology. |
|
""" |
|
) |
|
|
|
with gr.Tab("Record File"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
mic_input = gr.Microphone( type="filepath",label="Record voice") |
|
with gr.Row(): |
|
clr_btn = gr.Button(value="Clear", variant="secondary") |
|
sub_btn = gr.Button(value="submit") |
|
with gr.Column(): |
|
lbl_output = gr.HighlightedText(label="Result") |
|
|
|
clr_btn.click( |
|
fn=clear_inputs_and_outputs, |
|
inputs=[], |
|
outputs=[mic_input, lbl_output] |
|
) |
|
|
|
sub_btn.click( |
|
fn=recorded_process, |
|
inputs=[mic_input], |
|
outputs=[lbl_output] |
|
) |
|
|
|
with gr.Tab("streaming"): |
|
gr.Interface( |
|
fn=streaming_process, |
|
inputs=[ |
|
gr.Microphone(type="filepath", streaming=True)], |
|
outputs=[ |
|
|
|
gr.Textbox(type ="text", label="Result",)], |
|
live=True, |
|
allow_flagging="never" |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
print(text) |
|
text_streaming = output_streaming(text_streaming,text) |
|
gr.Textbox(value=text, label="Result", autofocus=True) |
|
|
|
with gr.Tab("Upload File"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
upl_input = gr.Audio( type="filepath", label="Upload a file") |
|
with gr.Row(): |
|
clr_btn = gr.Button(value="Clear", variant="secondary") |
|
sub_btn = gr.Button(value="submit") |
|
gr.Examples(examples=[ |
|
os.path.join(os.path.dirname(__file__),"examples/politics.mp3"), |
|
os.path.join(os.path.dirname(__file__),"examples/law1.mp3"), |
|
os.path.join(os.path.dirname(__file__),"examples/law2.mp3"), |
|
os.path.join(os.path.dirname(__file__),"examples/law3.mp3"), |
|
os.path.join(os.path.dirname(__file__),"examples/economy.mp3"), |
|
os.path.join(os.path.dirname(__file__),"examples/general.mp3") |
|
], |
|
inputs = upl_input) |
|
with gr.Column(): |
|
lbl_output = gr.HighlightedText(label="Result") |
|
|
|
clr_btn.click( |
|
fn=clear_inputs_and_outputs, |
|
inputs=[], |
|
outputs=[upl_input, lbl_output] |
|
) |
|
sub_btn.click( |
|
fn=uploaded_process, |
|
inputs=[upl_input], |
|
outputs=[lbl_output] |
|
) |
|
|
|
demo.launch(favicon_path = "./image/fe_logo.png", server_name="0.0.0.0", server_port=8085) |
|
|