Spaces:
Sleeping
Sleeping
import librosa.display | |
import matplotlib | |
import matplotlib.pyplot as plt | |
import matplotlib.style as ms | |
from PIL import Image | |
import matplotlib.ticker as ticker | |
from librosa.feature import melspectrogram | |
from python.util.time_util import int_to_min_sec | |
import librosa | |
import librosa.display | |
import numpy as np | |
from io import BytesIO | |
# https://stackoverflow.com/questions/69924881/userwarning-starting-a-matplotlib-gui-outside-of-the-main-thread-will-likely-fa | |
matplotlib.use('agg') | |
ms.use('seaborn-muted') | |
def update_ticks(x, pos): | |
which_second = (x / 16000) | |
return int_to_min_sec(which_second) | |
def plt_line(y_points, sample_rate=16000): | |
# plt line | |
fig, ax = plt.subplots() | |
ax.xaxis.set_major_formatter(ticker.FuncFormatter(update_ticks)) | |
plt.plot(y_points) | |
plt.title('Waveform') | |
# plot to image | |
buffer = BytesIO() | |
plt.savefig(buffer, format='png') | |
image = Image.open(buffer) | |
return image | |
# plt mfcc, https://www.cnblogs.com/LXP-Never/p/10918590.html | |
def plt_mfcc(single_channel, sample_rate): | |
mel_spec = melspectrogram(y=single_channel, sr=sample_rate, n_fft=1024, hop_length=512, n_mels=128) | |
log_mel_spec = librosa.power_to_db(mel_spec) | |
plt.figure() | |
librosa.display.specshow(log_mel_spec, sr=sample_rate, x_axis='time', y_axis='mel') | |
# plt.colorbar(format='%+2.0f dB') # 右边的色度条 | |
plt.title('MFCC') | |
# plot to image | |
buffer = BytesIO() | |
plt.savefig(buffer, format='png') | |
image = Image.open(buffer) | |
return image | |
# https://gist.github.com/stevemclaugh/80f192130852353ad53e6d8b6b275983 | |
def plt_mfcc2(wav_pathname, sample_rate): | |
y, sr = librosa.load(wav_pathname) | |
# Let's make and display a mel-scaled power (energy-squared) spectrogram | |
S = librosa.feature.melspectrogram(y=y, sr=sample_rate, n_mels=128) | |
# Convert to log scale (dB). We'll use the peak power as reference. | |
log_S = librosa.amplitude_to_db(S) | |
# Make a new figure | |
plt.figure(figsize=(12, 4)) | |
# Display the spectrogram on a mel scale | |
# sample rate and hop length parameters are used to render the time axis | |
librosa.display.specshow(log_S, sr=sample_rate, x_axis='time', y_axis='mel') | |
# Put a descriptive title on the plot | |
plt.title('mel power spectrogram') | |
# draw a color bar | |
# plt.colorbar(format='%+02.0f dB') | |
# Make the figure layout compact | |
plt.tight_layout() | |
S_rot = np.rot90(S, 3) | |
# Next, we'll extract the first 13 Mel-frequency cepstral coefficients (MFCCs) | |
mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13) | |
# Padding first and second deltas | |
delta_mfcc = librosa.feature.delta(mfcc) | |
delta2_mfcc = librosa.feature.delta(mfcc, order=2) | |
# We'll show each in its own subplot | |
# plt.figure(figsize=(12, 6)) | |
plt.figure() | |
# plt.subplot(3, 1, 1) | |
# librosa.display.specshow(mfcc) | |
# plt.ylabel('MFCC') | |
# plt.colorbar() | |
# plt.subplot(1, 1, 1) | |
# librosa.display.specshow(delta_mfcc) | |
# plt.ylabel('MFCC-$\Delta$') | |
# plt.colorbar() | |
plt.subplot() | |
librosa.display.specshow(delta2_mfcc, sr=sample_rate, x_axis='time') | |
plt.ylabel('MFCC-$\Delta^2$') | |
plt.colorbar() | |
plt.tight_layout() | |
# plot to image | |
buffer = BytesIO() | |
plt.savefig(buffer, format='png') | |
image = Image.open(buffer) | |
return image | |