import librosa.display import matplotlib import matplotlib.pyplot as plt import matplotlib.style as ms from PIL import Image import matplotlib.ticker as ticker from librosa.feature import melspectrogram from python.util.time_util import int_to_min_sec import librosa import librosa.display import numpy as np from io import BytesIO # https://stackoverflow.com/questions/69924881/userwarning-starting-a-matplotlib-gui-outside-of-the-main-thread-will-likely-fa matplotlib.use('agg') ms.use('seaborn-muted') def update_ticks(x, pos): which_second = (x / 16000) return int_to_min_sec(which_second) def plt_line(y_points, sample_rate=16000): # plt line fig, ax = plt.subplots() ax.xaxis.set_major_formatter(ticker.FuncFormatter(update_ticks)) plt.plot(y_points) # plot to image buffer = BytesIO() plt.savefig(buffer, format='png') image = Image.open(buffer) return image # plt mfcc, https://www.cnblogs.com/LXP-Never/p/10918590.html def plt_mfcc(single_channel, sample_rate): mel_spec = melspectrogram(y=single_channel, sr=sample_rate, n_fft=1024, hop_length=512, n_mels=128) log_mel_spec = librosa.power_to_db(mel_spec) plt.figure() librosa.display.specshow(log_mel_spec, sr=sample_rate, x_axis='time', y_axis='mel') plt.colorbar(format='%+2.0f dB') # 右边的色度条 plt.title('mfcc waveform') # plot to image buffer = BytesIO() plt.savefig(buffer, format='png') image = Image.open(buffer) return image # https://gist.github.com/stevemclaugh/80f192130852353ad53e6d8b6b275983 def plt_mfcc2(wav_pathname, sample_rate): y, sr = librosa.load(wav_pathname) # Let's make and display a mel-scaled power (energy-squared) spectrogram S = librosa.feature.melspectrogram(y=y, sr=sample_rate, n_mels=128) # Convert to log scale (dB). We'll use the peak power as reference. log_S = librosa.amplitude_to_db(S) # Make a new figure plt.figure(figsize=(12, 4)) # Display the spectrogram on a mel scale # sample rate and hop length parameters are used to render the time axis librosa.display.specshow(log_S, sr=sample_rate, x_axis='time', y_axis='mel') # Put a descriptive title on the plot plt.title('mel power spectrogram') # draw a color bar plt.colorbar(format='%+02.0f dB') # Make the figure layout compact plt.tight_layout() S_rot = np.rot90(S, 3) # Next, we'll extract the first 13 Mel-frequency cepstral coefficients (MFCCs) mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13) # Padding first and second deltas delta_mfcc = librosa.feature.delta(mfcc) delta2_mfcc = librosa.feature.delta(mfcc, order=2) # We'll show each in its own subplot # plt.figure(figsize=(12, 6)) plt.figure() # plt.subplot(3, 1, 1) # librosa.display.specshow(mfcc) # plt.ylabel('MFCC') # plt.colorbar() # plt.subplot(1, 1, 1) # librosa.display.specshow(delta_mfcc) # plt.ylabel('MFCC-$\Delta$') # plt.colorbar() plt.subplot() librosa.display.specshow(delta2_mfcc, sr=sample_rate, x_axis='time') plt.ylabel('MFCC-$\Delta^2$') plt.colorbar() plt.tight_layout() # plot to image buffer = BytesIO() plt.savefig(buffer, format='png') image = Image.open(buffer) return image