test_asr / app.py
arthoho66's picture
Update app.py
65ced0b
from transformers import pipeline
import gradio as gr
import time
p = pipeline(
task="automatic-speech-recognition",
model="arthoho66/model_005_2000",
token="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",
# device="cuda:0",
)
text = ""
def recorded_process(recorded_audio_file) -> str:
"""
to get both input
and use speech2text for get text
"""
text = p(recorded_audio_file)["text"]
return text
def streaming_process(streaming_audio_file) -> str:
global text
text = p(streaming_audio_file)["text"]
return text
def output_streaming(text_streaming,text01)-> str:
text_streaming+=text01
return text_streaming
def clear_inputs_and_outputs() -> list:
"""
Clears all inputs and outputs when the user clicks "Clear" button
"""
audio_chunk.remove_chunk()
return [None, None, None, None]
text_streaming = ""
with gr.Blocks() as demo:
with gr.Tab("Record File"):
with gr.Row():
with gr.Column():
mic_input = gr.Microphone( type="filepath",label="Record voice")
with gr.Row():
clr_btn = gr.Button(value="Clear", variant="secondary")
sub_btn = gr.Button(value="submit")
with gr.Column():
lbl_output = gr.Textbox(label="Result")
clr_btn.click(
fn=clear_inputs_and_outputs,
inputs=[],
outputs=[mic_input, lbl_output]
)
sub_btn.click(
fn=recorded_process,
inputs=[mic_input],
outputs=[lbl_output]
)
with gr.Tab("streaming"):
gr.Interface(
fn=streaming_process,
inputs=[
gr.Microphone(type="filepath", streaming=True)],
outputs=[
# # gr.HighlightedText(label="Result"),
gr.Textbox(type ="text", label="Result",)],
live=True,
allow_flagging="never"
)
with gr.Row():
with gr.Column():
print(text)
text_streaming = output_streaming(text_streaming,text)
gr.Textbox(value=text, label="Result", autofocus=True)
demo.launch()