File size: 4,438 Bytes
248b73a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
from engine.speech2text import Speech2Text
from engine.ner_engine import NERInfer
from engine.audio_chunk import AudioChunk
import glob
import time
import gradio as gr
import os
audio_chunk = AudioChunk()
speech2text = Speech2Text()
ner_tag = NERInfer()
text = ""
def streaming_process(streaming_audio_file) -> str:
global text
text = speech2text.process_streaming(streaming_audio_file)
# return text
def output_streaming(text_streaming,text01)-> str:
text_streaming+=text01
return text_streaming
def recorded_process(recorded_audio_file) -> str:
"""
to get both input
and use speech2text for get text
"""
text = speech2text.process_microphone(recorded_audio_file)
result = ner_tag.infer(text)
return {"text": text, "entities": result}
def uploaded_process(uploaded_audio_file) -> str:
audio_chunk.chunks_audio(uploaded_audio_file)
text = speech2text.process_audio()
result = ner_tag.infer(text)
return {"text": text, "entities": result}
def clear_inputs_and_outputs() -> list:
"""
Clears all inputs and outputs when the user clicks "Clear" button
"""
audio_chunk.remove_chunk()
return [None, None, None, None]
text_streaming = ""
with gr.Blocks() as demo:
"""
buld gradio app
"""
gr.Markdown(
"""
![logo](/file=./image/clicknext_logo2x.png)
# Automatic Speech Recognition
##### Experience real-time, accurate, and multilingual speech-to-text conversion with our cutting-edge ASR technology.
"""
)
with gr.Tab("Record File"):
with gr.Row():
with gr.Column():
mic_input = gr.Microphone( type="filepath",label="Record voice")
with gr.Row():
clr_btn = gr.Button(value="Clear", variant="secondary")
sub_btn = gr.Button(value="submit")
with gr.Column():
lbl_output = gr.HighlightedText(label="Result")
clr_btn.click(
fn=clear_inputs_and_outputs,
inputs=[],
outputs=[mic_input, lbl_output]
)
sub_btn.click(
fn=recorded_process,
inputs=[mic_input],
outputs=[lbl_output]
)
with gr.Tab("streaming"):
gr.Interface(
fn=streaming_process,
inputs=[
gr.Microphone(type="filepath", streaming=True)],
outputs=[
# # gr.HighlightedText(label="Result"),
gr.Textbox(type ="text", label="Result",)],
live=True,
allow_flagging="never"
)
with gr.Row():
with gr.Column():
print(text)
text_streaming = output_streaming(text_streaming,text)
gr.Textbox(value=text, label="Result", autofocus=True)
with gr.Tab("Upload File"):
with gr.Row():
with gr.Column():
upl_input = gr.Audio( type="filepath", label="Upload a file")
with gr.Row():
clr_btn = gr.Button(value="Clear", variant="secondary")
sub_btn = gr.Button(value="submit")
gr.Examples(examples=[
os.path.join(os.path.dirname(__file__),"examples/politics.mp3"),
os.path.join(os.path.dirname(__file__),"examples/law1.mp3"),
os.path.join(os.path.dirname(__file__),"examples/law2.mp3"),
os.path.join(os.path.dirname(__file__),"examples/law3.mp3"),
os.path.join(os.path.dirname(__file__),"examples/economy.mp3"),
os.path.join(os.path.dirname(__file__),"examples/general.mp3")
],
inputs = upl_input)
with gr.Column():
lbl_output = gr.HighlightedText(label="Result")
clr_btn.click(
fn=clear_inputs_and_outputs,
inputs=[],
outputs=[upl_input, lbl_output]
)
sub_btn.click(
fn=uploaded_process,
inputs=[upl_input],
outputs=[lbl_output]
)
demo.launch(favicon_path = "./image/fe_logo.png", server_name="0.0.0.0", server_port=8085)
|