Spaces:

arthoho66
/

test_asr

Sleeping

File size: 2,381 Bytes

af8c3ac
248b73a
af8c3ac
 
 
 
 
 
 
248b73a
 
 
 
 
 
af8c3ac
 
248b73a
 
af8c3ac
 
 
 
248b73a
af8c3ac
 
 
248b73a
 
 
 
 
 
 
 
af8c3ac
248b73a
 
 
 
 
 
 
 
 
 
 
af8c3ac
248b73a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65ced0b

from transformers import pipeline
import gradio as gr
import time
p = pipeline(
            task="automatic-speech-recognition",
            model="arthoho66/model_005_2000",
            token="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",
            # device="cuda:0",
        )
text = ""
def recorded_process(recorded_audio_file) -> str:
    """ 
    to get both input 
    and use speech2text for get text
    """
    text = p(recorded_audio_file)["text"]
    return text


def streaming_process(streaming_audio_file) -> str:
    global text 
    text = p(streaming_audio_file)["text"]
    return text

def output_streaming(text_streaming,text01)-> str:
    text_streaming+=text01
    return text_streaming

def clear_inputs_and_outputs() -> list:
    """
    Clears all inputs and outputs when the user clicks "Clear" button
    """
    audio_chunk.remove_chunk()
    return [None, None, None, None]


text_streaming = ""

with gr.Blocks() as demo:
    with gr.Tab("Record File"):
        with gr.Row():  
            with gr.Column():
                mic_input = gr.Microphone( type="filepath",label="Record voice")
                with gr.Row():
                    clr_btn = gr.Button(value="Clear", variant="secondary")
                    sub_btn = gr.Button(value="submit")
            with gr.Column():
                lbl_output = gr.Textbox(label="Result")

            clr_btn.click(
                fn=clear_inputs_and_outputs,
                    inputs=[],
                    outputs=[mic_input, lbl_output]
            )

            sub_btn.click(
                fn=recorded_process,
                inputs=[mic_input],
                outputs=[lbl_output]
            )

    with gr.Tab("streaming"):
        gr.Interface(
                    fn=streaming_process,
                    inputs=[
                        gr.Microphone(type="filepath", streaming=True)],
                    outputs=[
                    #     # gr.HighlightedText(label="Result"),
                        gr.Textbox(type ="text", label="Result",)],
                    live=True,
                    allow_flagging="never"
                    )
        with gr.Row():  
            with gr.Column():
                print(text)
                text_streaming = output_streaming(text_streaming,text)
                gr.Textbox(value=text, label="Result", autofocus=True)

demo.launch()