File size: 4,294 Bytes
5407cce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
### Kevin @ Laronix

from glob import glob
from pathlib import Path
import matplotlib
from matplotlib.transforms import Bbox  
import matplotlib.pyplot as plt

import numpy as np
import pdb
import parselmouth

def draw_spectrogram(spectrogram, dynamic_range=80):
    X, Y = spectrogram.x_grid(), spectrogram.y_grid()
    sg_db = 10 * np.log10(spectrogram.values)
    plt.pcolormesh(X, Y, sg_db, vmin=sg_db.max() - dynamic_range)
    plt.ylim([spectrogram.ymin, spectrogram.ymax])
    # TODO add colorbar to spectrogram with limitation from -40 to 0
    
    # plt.xlabel("time [s]")
    plt.ylabel("frequency [Hz]")

def draw_intensity(intensity):
    # draw intensity in red with x axis as time
    plt.plot(intensity.xs(), intensity.values.T, linewidth=3, color='r')
    plt.plot(intensity.xs(), intensity.values.T, linewidth=1, color="w")
    intensity_values = intensity.values.T
    # get range of intensity
    intensity_min = np.nanmin(intensity_values)
    intensity_max = np.nanmax(intensity_values)
    # project maximum and minimum intensity to y axis in dotted line
    intensity_min_index = np.where(intensity.values.T == intensity_min)
    intensity_min_time = intensity.xs()[intensity_min_index[0]][0]
    intensity_max_index = np.where(intensity.values.T == intensity_max)
    intensity_max_time = intensity.xs()[intensity_max_index[0]][0]
    
    plt.plot([intensity.xmax, intensity_min_time], [intensity_min, intensity_min], linewidth=1, linestyle='dotted', color='red')
    plt.plot([intensity.xmax, intensity_max_time], [intensity_max, intensity_max], linewidth=1, linestyle='dotted', color='red')
    # add text at intensity_min and intensity_max on y axis
    plt.text(intensity.xmax, intensity_min, str(round(intensity_min, 1)), color='red')
    plt.text(intensity.xmax, intensity_max, str(round(intensity_max, 1)), color='red')
    # get intensity.min's index
    
    plt.grid(False)
    plt.ylim(0)
    plt.ylabel("intensity [dB]")

def draw_pitch(pitch):
    # Extract selected pitch contour, and
    # replace unvoiced samples by NaN to not plot
    pitch_values = pitch.selected_array['frequency']
    pitch_values[pitch_values==0] = np.nan
    pitch_min = np.nanmin(pitch_values)
    pitch_max = np.nanmax(pitch_values)
    plt.plot(pitch.xs(), pitch_values,  markersize=5, color='blue')
    # plt.plot(pitch.xs(), pitch_values,  markersize=, color='white')
    # project maximum and minimum db to y axis in dotted line
    pitch_min_index = np.where(pitch_values == pitch_min)
    pitch_min_time = pitch.xs()[pitch_min_index[0]][0]
    pitch_max_index = np.where(pitch_values == pitch_max)
    pitch_max_time = pitch.xs()[pitch_max_index[0]][0]
    plt.plot([pitch.xmin, pitch_min_time], [pitch_min, pitch_min], linewidth=1, linestyle='dotted', color='blue')
    plt.plot([pitch.xmin, pitch_max_time], [pitch_max, pitch_max], linewidth=1, linestyle='dotted', color='blue')
    # add text at pitch_min and pitch_max on y axis
    # highlight pitch_min and pitch_max
    plt.scatter(pitch_min_time, pitch_min, color='blue', s=100)
    plt.scatter(pitch_max_time, pitch_max, color='blue', s=100)
    
    plt.text(pitch_min_time-0.2, pitch_min-30, "f0min = " + str(round(pitch_min, 1) ), color='blue', fontsize=12)
    plt.text(pitch_max_time-0.2, pitch_max+30, "f0max = " + str(round(pitch_max, 1) ), color='blue', fontsize=12)
    
    plt.grid(False)
    
    plt.ylim(max([0, pitch_min-50]), pitch_max+50)
    plt.ylabel("fundamental frequency [Hz]")

def draw_spec_db_pitch(wav, save_fig_path=None):
    # get figure
    fig = plt.figure(figsize=(10, 5))
    fig.tight_layout()

    # get pitch, intensity, spectrogram
    snd = parselmouth.Sound(str(wav))
    pitch = snd.to_pitch()
    intensity = snd.to_intensity()
    pre_emphasized_snd = snd.copy()
    pre_emphasized_snd.pre_emphasize()
    spectrogram = pre_emphasized_snd.to_spectrogram(window_length=0.1)

    # draw dB plot and spectrogram
    plt.subplot(2, 1, 1)
    draw_spectrogram(spectrogram)
    plt.twinx()
    draw_intensity(intensity)
    plt.xlim([snd.xmin, snd.xmax])

    # draw pitch contour
    plt.subplot(2, 1, 2)
    draw_pitch(pitch)
    plt.xlim([snd.xmin, snd.xmax])
    plt.xlabel("time [s]")
    
    return fig

# f = draw_spec_db_pitch("./test.wav")
# plt.savefig("y.png")