Spaces:
Running
Running
"""A sample to use WebRTC in sendonly mode to transfer audio frames | |
from the browser to the server and visualize them with matplotlib | |
and `st.pyplot`.""" | |
import logging | |
import queue | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import pydub | |
import streamlit as st | |
from streamlit_webrtc import WebRtcMode, webrtc_streamer | |
from sample_utils.turn import get_ice_servers | |
logger = logging.getLogger(__name__) | |
webrtc_ctx = webrtc_streamer( | |
key="sendonly-audio", | |
mode=WebRtcMode.SENDONLY, | |
audio_receiver_size=256, | |
rtc_configuration={"iceServers": get_ice_servers()}, | |
media_stream_constraints={"audio": True}, | |
) | |
fig_place = st.empty() | |
fig, [ax_time, ax_freq] = plt.subplots(2, 1, gridspec_kw={"top": 1.5, "bottom": 0.2}) | |
sound_window_len = 5000 # 5s | |
sound_window_buffer = None | |
while True: | |
if webrtc_ctx.audio_receiver: | |
try: | |
audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1) | |
except queue.Empty: | |
logger.warning("Queue is empty. Abort.") | |
break | |
sound_chunk = pydub.AudioSegment.empty() | |
for audio_frame in audio_frames: | |
sound = pydub.AudioSegment( | |
data=audio_frame.to_ndarray().tobytes(), | |
sample_width=audio_frame.format.bytes, | |
frame_rate=audio_frame.sample_rate, | |
channels=len(audio_frame.layout.channels), | |
) | |
sound_chunk += sound | |
if len(sound_chunk) > 0: | |
if sound_window_buffer is None: | |
sound_window_buffer = pydub.AudioSegment.silent( | |
duration=sound_window_len | |
) | |
sound_window_buffer += sound_chunk | |
if len(sound_window_buffer) > sound_window_len: | |
sound_window_buffer = sound_window_buffer[-sound_window_len:] | |
if sound_window_buffer: | |
# Ref: https://own-search-and-study.xyz/2017/10/27/python%E3%82%92%E4%BD%BF%E3%81%A3%E3%81%A6%E9%9F%B3%E5%A3%B0%E3%83%87%E3%83%BC%E3%82%BF%E3%81%8B%E3%82%89%E3%82%B9%E3%83%9A%E3%82%AF%E3%83%88%E3%83%AD%E3%82%B0%E3%83%A9%E3%83%A0%E3%82%92%E4%BD%9C/ # noqa | |
sound_window_buffer = sound_window_buffer.set_channels(1) # Stereo to mono | |
sample = np.array(sound_window_buffer.get_array_of_samples()) | |
ax_time.cla() | |
times = (np.arange(-len(sample), 0)) / sound_window_buffer.frame_rate | |
ax_time.plot(times, sample) | |
ax_time.set_xlabel("Time") | |
ax_time.set_ylabel("Magnitude") | |
spec = np.fft.fft(sample) | |
freq = np.fft.fftfreq(sample.shape[0], 1.0 / sound_chunk.frame_rate) | |
freq = freq[: int(freq.shape[0] / 2)] | |
spec = spec[: int(spec.shape[0] / 2)] | |
spec[0] = spec[0] / 2 | |
ax_freq.cla() | |
ax_freq.plot(freq, np.abs(spec)) | |
ax_freq.set_xlabel("Frequency") | |
ax_freq.set_yscale("log") | |
ax_freq.set_ylabel("Magnitude") | |
fig_place.pyplot(fig) | |
else: | |
logger.warning("AudioReciver is not set. Abort.") | |
break | |