yamnet_test / python /util /plt_util.py
Luis
add mp4_to_mp3.py
c0a2456
raw
history blame
3.34 kB
import librosa.display
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.style as ms
from PIL import Image
import matplotlib.ticker as ticker
from librosa.feature import melspectrogram
from python.util.time_util import int_to_min_sec
import librosa
import librosa.display
import numpy as np
from io import BytesIO
# https://stackoverflow.com/questions/69924881/userwarning-starting-a-matplotlib-gui-outside-of-the-main-thread-will-likely-fa
matplotlib.use('agg')
ms.use('seaborn-muted')
def update_ticks(x, pos):
which_second = (x / 16000)
return int_to_min_sec(which_second)
def plt_line(y_points, sample_rate=16000):
# plt line
fig, ax = plt.subplots()
ax.xaxis.set_major_formatter(ticker.FuncFormatter(update_ticks))
plt.plot(y_points)
plt.title('Waveform')
# plot to image
buffer = BytesIO()
plt.savefig(buffer, format='png')
image = Image.open(buffer)
return image
# plt mfcc, https://www.cnblogs.com/LXP-Never/p/10918590.html
def plt_mfcc(single_channel, sample_rate):
mel_spec = melspectrogram(y=single_channel, sr=sample_rate, n_fft=1024, hop_length=512, n_mels=128)
log_mel_spec = librosa.power_to_db(mel_spec)
plt.figure()
librosa.display.specshow(log_mel_spec, sr=sample_rate, x_axis='time', y_axis='mel')
# plt.colorbar(format='%+2.0f dB') # 右边的色度条
plt.title('MFCC')
# plot to image
buffer = BytesIO()
plt.savefig(buffer, format='png')
image = Image.open(buffer)
return image
# https://gist.github.com/stevemclaugh/80f192130852353ad53e6d8b6b275983
def plt_mfcc2(wav_pathname, sample_rate):
y, sr = librosa.load(wav_pathname)
# Let's make and display a mel-scaled power (energy-squared) spectrogram
S = librosa.feature.melspectrogram(y=y, sr=sample_rate, n_mels=128)
# Convert to log scale (dB). We'll use the peak power as reference.
log_S = librosa.amplitude_to_db(S)
# Make a new figure
plt.figure(figsize=(12, 4))
# Display the spectrogram on a mel scale
# sample rate and hop length parameters are used to render the time axis
librosa.display.specshow(log_S, sr=sample_rate, x_axis='time', y_axis='mel')
# Put a descriptive title on the plot
plt.title('mel power spectrogram')
# draw a color bar
# plt.colorbar(format='%+02.0f dB')
# Make the figure layout compact
plt.tight_layout()
S_rot = np.rot90(S, 3)
# Next, we'll extract the first 13 Mel-frequency cepstral coefficients (MFCCs)
mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13)
# Padding first and second deltas
delta_mfcc = librosa.feature.delta(mfcc)
delta2_mfcc = librosa.feature.delta(mfcc, order=2)
# We'll show each in its own subplot
# plt.figure(figsize=(12, 6))
plt.figure()
# plt.subplot(3, 1, 1)
# librosa.display.specshow(mfcc)
# plt.ylabel('MFCC')
# plt.colorbar()
# plt.subplot(1, 1, 1)
# librosa.display.specshow(delta_mfcc)
# plt.ylabel('MFCC-$\Delta$')
# plt.colorbar()
plt.subplot()
librosa.display.specshow(delta2_mfcc, sr=sample_rate, x_axis='time')
plt.ylabel('MFCC-$\Delta^2$')
plt.colorbar()
plt.tight_layout()
# plot to image
buffer = BytesIO()
plt.savefig(buffer, format='png')
image = Image.open(buffer)
return image