Spaces:
Runtime error
Runtime error
"""Create spectrograms from audio files using matplotlib""" | |
import logging | |
from dataclasses import dataclass | |
from pathlib import Path | |
from typing import Tuple, Union | |
import matplotlib as mpl | |
mpl.use('Agg') | |
import librosa | |
import librosa.display | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import scipy.signal as signal | |
logging.basicConfig(format='%(asctime)s: %(message)s', level=logging.INFO) | |
class FFTConfig(): | |
n_fft: Union[int, None] = 2**12 | |
win_length: Union[int, None] = None | |
hop_length: int = 512 | |
sr: int = 22_050 | |
db: bool = False | |
mel: bool = False | |
fmin: int = 50 | |
fmax: int = 10_000 | |
y_axis: str = 'linear' | |
denoise: Union[str, None] = None | |
pcen: bool = False | |
cmap: str = 'magma' | |
n_mels: int = 128 | |
vmin: Union[float, None] = None | |
vmax: Union[float, None] = None | |
bandpass: bool = True | |
ylim: Union[Tuple[float, float], None] = (0, 512) | |
def load_wav(fpath): | |
y, sr = librosa.load(fpath) | |
audio, _ = librosa.effects.trim(y) | |
return audio, sr | |
def calc_stft(audio, fft_config): | |
stft = librosa.stft(audio, n_fft=fft_config.n_fft, hop_length=fft_config.hop_length, win_length=fft_config.win_length) | |
return np.abs(stft) | |
def plot_spec(inp, output, fft_config: FFTConfig): | |
# Audio returns sr and audio! (opposite of librosa) | |
sr, audio = inp | |
fft_config.sr = sr | |
if fft_config.bandpass: | |
audio = fish_filter(audio, fs=sr) | |
stft = calc_stft(audio, fft_config) | |
if fft_config.pcen: | |
# Scale PCEN: https://librosa.org/doc/latest/generated/librosa.pcen.html?highlight=pcen#librosa.pcen | |
stft = librosa.pcen(stft * (2**31), sr=fft_config.sr, hop_length=fft_config.hop_length) | |
fft_config.db = True | |
if fft_config.mel: | |
stft = librosa.feature.melspectrogram( | |
y=audio, | |
sr=fft_config.sr, | |
n_mels=fft_config.n_mels, | |
fmin=fft_config.fmin, | |
fmax=fft_config.fmax | |
) | |
# Mel is in db | |
fft_config.db = True | |
if fft_config.db: | |
stft = librosa.amplitude_to_db(stft, ref=np.max) | |
fig, ax = plt.subplots(1, 1) | |
_ = librosa.display.specshow( | |
stft, | |
sr=fft_config.sr, | |
hop_length=fft_config.hop_length, | |
x_axis='time', | |
y_axis=fft_config.y_axis, | |
fmin=fft_config.fmin, | |
fmax=fft_config.fmax, | |
cmap=fft_config.cmap, | |
ax=ax, | |
vmin=fft_config.vmin, | |
vmax=fft_config.vmax | |
) | |
ax.set_axis_off() | |
if fft_config.ylim is not None: | |
ax.set_ylim(fft_config.ylim) | |
if output: | |
fig.savefig(output, bbox_inches='tight', pad_inches=0) | |
plt.close(fig=fig) | |
plt.close('all') | |
def fish_filter(call, low=50, high=512, order=8, fs=22_050): | |
sos = signal.butter(order, [low, high], 'bandpass', output='sos', fs=fs) | |
return signal.sosfilt(sos, call) | |