Spaces:

sanchit-gandhi
/

audio-streaming

Running

App Files Files Community

sanchit-gandhi HF staff commited on May 29

Commit

bdf2bf2

•

1 Parent(s): af4cf59

try bytes

Browse files

Files changed (2) hide show

app.py +34 -3
librispeech.mp3 +0 -0

app.py CHANGED Viewed

@@ -1,6 +1,35 @@
 import gradio as gr
 import math
 import time
 def stream(audio, chunk_length_s):
     start_time = time.time()
@@ -15,10 +44,11 @@ def stream(audio, chunk_length_s):
         start_pos = idx * chunk_length
         end_pos = min((idx + 1) * chunk_length, audio_length)
         chunk = array[start_pos : end_pos]
         if idx == 0:
             first_time = round(time.time() - start_time, 2)
         run_time = round(time.time() - start_time, 2)
-        yield (sampling_rate, chunk), first_time, run_time
 with gr.Blocks() as demo:
     with gr.Row():
@@ -27,10 +57,11 @@ with gr.Blocks() as demo:
             chunk_length = gr.Slider(minimum=2, maximum=10, value=2, step=2, label="Chunk length (s)")
             run_button = gr.Button("Stream audio")
         with gr.Column():
-            audio_out = gr.Audio(streaming=True, autoplay=True)
             first_time = gr.Textbox(label="Time to first chunk (s)")
             run_time = gr.Textbox(label="Time to current chunk (s)")
-    run_button.click(fn=stream, inputs=[audio_in, chunk_length], outputs=[audio_out, first_time, run_time])
 demo.launch()

 import gradio as gr
 import math
 import time
+import numpy as np
+from pydub import AudioSegment
+import io
+def numpy_to_mp3(audio_array, sampling_rate):
+    # Normalize audio_array if it's floating-point
+    if np.issubdtype(audio_array.dtype, np.floating):
+        max_val = np.max(np.abs(audio_array))
+        audio_array = (audio_array / max_val) * 32767  # Normalize to 16-bit range
+        audio_array = audio_array.astype(np.int16)
+    # Create an audio segment from the numpy array
+    audio_segment = AudioSegment(
+        audio_array.tobytes(),
+        frame_rate=sampling_rate,
+        sample_width=audio_array.dtype.itemsize,
+        channels=1
+    )
+    # Export the audio segment to MP3 bytes
+    mp3_io = io.BytesIO()
+    audio_segment.export(mp3_io, format="mp3")
+    # Get the MP3 bytes
+    mp3_bytes = mp3_io.getvalue()
+    mp3_io.close()
+    return mp3_bytes
 def stream(audio, chunk_length_s):
     start_time = time.time()
         start_pos = idx * chunk_length
         end_pos = min((idx + 1) * chunk_length, audio_length)
         chunk = array[start_pos : end_pos]
+        chunk_mp3 = numpy_to_mp3(chunk, sampling_rate=sampling_rate)
         if idx == 0:
             first_time = round(time.time() - start_time, 2)
         run_time = round(time.time() - start_time, 2)
+        yield (sampling_rate, chunk), chunk_mp3, first_time, run_time
 with gr.Blocks() as demo:
     with gr.Row():
             chunk_length = gr.Slider(minimum=2, maximum=10, value=2, step=2, label="Chunk length (s)")
             run_button = gr.Button("Stream audio")
         with gr.Column():
+            audio_out = gr.Audio(streaming=True, autoplay=True, label="wav")
+            audio_out_mp3 = gr.Audio(streaming=True, autoplay=True, format="mp3", label="mp3")
             first_time = gr.Textbox(label="Time to first chunk (s)")
             run_time = gr.Textbox(label="Time to current chunk (s)")
+    run_button.click(fn=stream, inputs=[audio_in, chunk_length], outputs=[audio_out, audio_out_mp3, first_time, run_time])
 demo.launch()

librispeech.mp3 ADDED Viewed

Binary file (627 kB). View file