Spaces:
Running
Running
| import librosa.display | |
| import matplotlib | |
| import matplotlib.pyplot as plt | |
| import matplotlib.style as ms | |
| from PIL import Image | |
| import matplotlib.ticker as ticker | |
| from librosa.feature import melspectrogram | |
| from python.util.time_util import int_to_min_sec | |
| import librosa | |
| import librosa.display | |
| import numpy as np | |
| from io import BytesIO | |
| # https://stackoverflow.com/questions/69924881/userwarning-starting-a-matplotlib-gui-outside-of-the-main-thread-will-likely-fa | |
| matplotlib.use('agg') | |
| ms.use('seaborn-muted') | |
| def update_ticks(x, pos): | |
| which_second = (x / 16000) | |
| return int_to_min_sec(which_second) | |
| def plt_line(y_points, sample_rate=16000): | |
| # plt line | |
| fig, ax = plt.subplots() | |
| ax.xaxis.set_major_formatter(ticker.FuncFormatter(update_ticks)) | |
| plt.plot(y_points) | |
| # plot to image | |
| buffer = BytesIO() | |
| plt.savefig(buffer, format='png') | |
| image = Image.open(buffer) | |
| return image | |
| # plt mfcc, https://www.cnblogs.com/LXP-Never/p/10918590.html | |
| def plt_mfcc(single_channel, sample_rate): | |
| mel_spec = melspectrogram(y=single_channel, sr=sample_rate, n_fft=1024, hop_length=512, n_mels=128) | |
| log_mel_spec = librosa.power_to_db(mel_spec) | |
| plt.figure() | |
| librosa.display.specshow(log_mel_spec, sr=sample_rate, x_axis='time', y_axis='mel') | |
| plt.colorbar(format='%+2.0f dB') # 右边的色度条 | |
| plt.title('mfcc waveform') | |
| # plot to image | |
| buffer = BytesIO() | |
| plt.savefig(buffer, format='png') | |
| image = Image.open(buffer) | |
| return image | |
| # https://gist.github.com/stevemclaugh/80f192130852353ad53e6d8b6b275983 | |
| def plt_mfcc2(wav_pathname, sample_rate): | |
| y, sr = librosa.load(wav_pathname) | |
| # Let's make and display a mel-scaled power (energy-squared) spectrogram | |
| S = librosa.feature.melspectrogram(y=y, sr=sample_rate, n_mels=128) | |
| # Convert to log scale (dB). We'll use the peak power as reference. | |
| log_S = librosa.amplitude_to_db(S) | |
| # Make a new figure | |
| plt.figure(figsize=(12, 4)) | |
| # Display the spectrogram on a mel scale | |
| # sample rate and hop length parameters are used to render the time axis | |
| librosa.display.specshow(log_S, sr=sample_rate, x_axis='time', y_axis='mel') | |
| # Put a descriptive title on the plot | |
| plt.title('mel power spectrogram') | |
| # draw a color bar | |
| plt.colorbar(format='%+02.0f dB') | |
| # Make the figure layout compact | |
| plt.tight_layout() | |
| S_rot = np.rot90(S, 3) | |
| # Next, we'll extract the first 13 Mel-frequency cepstral coefficients (MFCCs) | |
| mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13) | |
| # Padding first and second deltas | |
| delta_mfcc = librosa.feature.delta(mfcc) | |
| delta2_mfcc = librosa.feature.delta(mfcc, order=2) | |
| # We'll show each in its own subplot | |
| # plt.figure(figsize=(12, 6)) | |
| plt.figure() | |
| # plt.subplot(3, 1, 1) | |
| # librosa.display.specshow(mfcc) | |
| # plt.ylabel('MFCC') | |
| # plt.colorbar() | |
| # plt.subplot(1, 1, 1) | |
| # librosa.display.specshow(delta_mfcc) | |
| # plt.ylabel('MFCC-$\Delta$') | |
| # plt.colorbar() | |
| plt.subplot() | |
| librosa.display.specshow(delta2_mfcc, sr=sample_rate, x_axis='time') | |
| plt.ylabel('MFCC-$\Delta^2$') | |
| plt.colorbar() | |
| plt.tight_layout() | |
| # plot to image | |
| buffer = BytesIO() | |
| plt.savefig(buffer, format='png') | |
| image = Image.open(buffer) | |
| return image | |