File size: 3,320 Bytes
97249f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import librosa.display
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.style as ms
from PIL import Image

import matplotlib.ticker as ticker
from librosa.feature import melspectrogram
from python.util.time_util import int_to_min_sec

import librosa
import librosa.display
import numpy as np
from io import BytesIO

# https://stackoverflow.com/questions/69924881/userwarning-starting-a-matplotlib-gui-outside-of-the-main-thread-will-likely-fa
matplotlib.use('agg')
ms.use('seaborn-muted')


def update_ticks(x, pos):
    which_second = (x / 16000)
    return int_to_min_sec(which_second)


def plt_line(y_points, sample_rate=16000):
    # plt line
    fig, ax = plt.subplots()
    ax.xaxis.set_major_formatter(ticker.FuncFormatter(update_ticks))
    plt.plot(y_points)

    # plot to image
    buffer = BytesIO()
    plt.savefig(buffer, format='png')
    image = Image.open(buffer)

    return image


# plt mfcc, https://www.cnblogs.com/LXP-Never/p/10918590.html
def plt_mfcc(single_channel, sample_rate):
    mel_spec = melspectrogram(y=single_channel, sr=sample_rate, n_fft=1024, hop_length=512, n_mels=128)
    log_mel_spec = librosa.power_to_db(mel_spec)
    plt.figure()
    librosa.display.specshow(log_mel_spec, sr=sample_rate, x_axis='time', y_axis='mel')
    plt.colorbar(format='%+2.0f dB')  # 右边的色度条
    plt.title('mfcc waveform')

    # plot to image
    buffer = BytesIO()
    plt.savefig(buffer, format='png')
    image = Image.open(buffer)

    return image


# https://gist.github.com/stevemclaugh/80f192130852353ad53e6d8b6b275983
def plt_mfcc2(wav_pathname, sample_rate):
    y, sr = librosa.load(wav_pathname)
    # Let's make and display a mel-scaled power (energy-squared) spectrogram
    S = librosa.feature.melspectrogram(y=y, sr=sample_rate, n_mels=128)

    # Convert to log scale (dB). We'll use the peak power as reference.
    log_S = librosa.amplitude_to_db(S)

    # Make a new figure
    plt.figure(figsize=(12, 4))

    # Display the spectrogram on a mel scale
    # sample rate and hop length parameters are used to render the time axis
    librosa.display.specshow(log_S, sr=sample_rate, x_axis='time', y_axis='mel')

    # Put a descriptive title on the plot
    plt.title('mel power spectrogram')

    # draw a color bar
    plt.colorbar(format='%+02.0f dB')

    # Make the figure layout compact
    plt.tight_layout()

    S_rot = np.rot90(S, 3)

    # Next, we'll extract the first 13 Mel-frequency cepstral coefficients (MFCCs)
    mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13)

    # Padding first and second deltas
    delta_mfcc = librosa.feature.delta(mfcc)
    delta2_mfcc = librosa.feature.delta(mfcc, order=2)

    # We'll show each in its own subplot
    # plt.figure(figsize=(12, 6))
    plt.figure()

    # plt.subplot(3, 1, 1)
    # librosa.display.specshow(mfcc)
    # plt.ylabel('MFCC')
    # plt.colorbar()

    # plt.subplot(1, 1, 1)
    # librosa.display.specshow(delta_mfcc)
    # plt.ylabel('MFCC-$\Delta$')
    # plt.colorbar()

    plt.subplot()
    librosa.display.specshow(delta2_mfcc, sr=sample_rate, x_axis='time')
    plt.ylabel('MFCC-$\Delta^2$')
    plt.colorbar()

    plt.tight_layout()

    # plot to image
    buffer = BytesIO()
    plt.savefig(buffer, format='png')
    image = Image.open(buffer)

    return image