import gradio as gr import math import time def stream(audio, chunk_length_s): start_time = time.time() sampling_rate, array = audio chunk_length = int(chunk_length_s * sampling_rate) time_length = chunk_length_s / 2 # always stream outputs faster than it takes to process audio_length = len(array) num_batches = math.ceil(audio_length / chunk_length) for idx in range(num_batches): time.sleep(time_length) start_pos = idx * chunk_length end_pos = min((idx + 1) * chunk_length, audio_length) chunk = array[start_pos : end_pos] if idx == 0: first_time = round(time.time() - start_time, 2) run_time = round(time.time() - start_time, 2) yield (sampling_rate, chunk), first_time, run_time with gr.Blocks() as demo: with gr.Row(): with gr.Column(): audio_in = gr.Audio(value="librispeech.wav", sources=["upload"], type="numpy") chunk_length = gr.Slider(minimum=1, maximum=3, value=2, step=1, label="Chunk length (s)") run_button = gr.Button("Stream audio") with gr.Column(): audio_out = gr.Audio(streaming=True, autoplay=True) first_time = gr.Textbox(label="Time to first chunk (s)") run_time = gr.Textbox(label="Time to current chunk (s)") run_button.click(fn=stream, inputs=[audio_in, chunk_length], outputs=[audio_out, first_time, run_time]) demo.launch()