|
from engine.speech2text import Speech2Text |
|
from engine.ner_engine import NERInfer |
|
from engine.audio_chunk import AudioChunk |
|
import glob |
|
import time |
|
import gradio as gr |
|
import os |
|
|
|
audio_chunk = AudioChunk() |
|
speech2text = Speech2Text() |
|
ner_tag = NERInfer() |
|
|
|
text = "" |
|
def streaming_process(streaming_audio_file) -> str: |
|
global text |
|
text = speech2text.process_streaming(streaming_audio_file) |
|
|
|
|
|
def output_streaming(text_streaming,text01)-> str: |
|
text_streaming+=text01 |
|
return text_streaming |
|
|
|
def recorded_process(recorded_audio_file) -> str: |
|
""" |
|
to get both input |
|
and use speech2text for get text |
|
""" |
|
text = speech2text.process_microphone(recorded_audio_file) |
|
result = ner_tag.infer(text) |
|
return {"text": text, "entities": result} |
|
|
|
|
|
def uploaded_process(uploaded_audio_file) -> str: |
|
audio_chunk.chunks_audio(uploaded_audio_file) |
|
text = speech2text.process_audio() |
|
result = ner_tag.infer(text) |
|
return {"text": text, "entities": result} |
|
|
|
|
|
def clear_inputs_and_outputs() -> list: |
|
""" |
|
Clears all inputs and outputs when the user clicks "Clear" button |
|
""" |
|
audio_chunk.remove_chunk() |
|
return [None, None, None, None] |
|
|
|
text_streaming = "" |
|
|
|
with gr.Blocks() as demo: |
|
""" |
|
buld gradio app |
|
|
|
""" |
|
gr.Markdown( |
|
""" |
|
 |
|
|
|
# Automatic Speech Recognition |
|
|
|
##### Experience real-time, accurate, and multilingual speech-to-text conversion with our cutting-edge ASR technology. |
|
""" |
|
) |
|
|
|
with gr.Tab("Record File"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
mic_input = gr.Microphone( type="filepath",label="Record voice") |
|
with gr.Row(): |
|
clr_btn = gr.Button(value="Clear", variant="secondary") |
|
sub_btn = gr.Button(value="submit") |
|
with gr.Column(): |
|
lbl_output = gr.HighlightedText(label="Result") |
|
|
|
clr_btn.click( |
|
fn=clear_inputs_and_outputs, |
|
inputs=[], |
|
outputs=[mic_input, lbl_output] |
|
) |
|
|
|
sub_btn.click( |
|
fn=recorded_process, |
|
inputs=[mic_input], |
|
outputs=[lbl_output] |
|
) |
|
|
|
with gr.Tab("streaming"): |
|
gr.Interface( |
|
fn=streaming_process, |
|
inputs=[ |
|
gr.Microphone(type="filepath", streaming=True)], |
|
outputs=[ |
|
|
|
gr.Textbox(type ="text", label="Result",)], |
|
live=True, |
|
allow_flagging="never" |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
print(text) |
|
text_streaming = output_streaming(text_streaming,text) |
|
gr.Textbox(value=text, label="Result", autofocus=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo.launch(server_name="0.0.0.0", server_port=8085) |
|
|