KevinGeng's picture
support pitch contour and db plotting
5407cce
raw
history blame
No virus
4.29 kB
### Kevin @ Laronix
from glob import glob
from pathlib import Path
import matplotlib
from matplotlib.transforms import Bbox
import matplotlib.pyplot as plt
import numpy as np
import pdb
import parselmouth
def draw_spectrogram(spectrogram, dynamic_range=80):
X, Y = spectrogram.x_grid(), spectrogram.y_grid()
sg_db = 10 * np.log10(spectrogram.values)
plt.pcolormesh(X, Y, sg_db, vmin=sg_db.max() - dynamic_range)
plt.ylim([spectrogram.ymin, spectrogram.ymax])
# TODO add colorbar to spectrogram with limitation from -40 to 0
# plt.xlabel("time [s]")
plt.ylabel("frequency [Hz]")
def draw_intensity(intensity):
# draw intensity in red with x axis as time
plt.plot(intensity.xs(), intensity.values.T, linewidth=3, color='r')
plt.plot(intensity.xs(), intensity.values.T, linewidth=1, color="w")
intensity_values = intensity.values.T
# get range of intensity
intensity_min = np.nanmin(intensity_values)
intensity_max = np.nanmax(intensity_values)
# project maximum and minimum intensity to y axis in dotted line
intensity_min_index = np.where(intensity.values.T == intensity_min)
intensity_min_time = intensity.xs()[intensity_min_index[0]][0]
intensity_max_index = np.where(intensity.values.T == intensity_max)
intensity_max_time = intensity.xs()[intensity_max_index[0]][0]
plt.plot([intensity.xmax, intensity_min_time], [intensity_min, intensity_min], linewidth=1, linestyle='dotted', color='red')
plt.plot([intensity.xmax, intensity_max_time], [intensity_max, intensity_max], linewidth=1, linestyle='dotted', color='red')
# add text at intensity_min and intensity_max on y axis
plt.text(intensity.xmax, intensity_min, str(round(intensity_min, 1)), color='red')
plt.text(intensity.xmax, intensity_max, str(round(intensity_max, 1)), color='red')
# get intensity.min's index
plt.grid(False)
plt.ylim(0)
plt.ylabel("intensity [dB]")
def draw_pitch(pitch):
# Extract selected pitch contour, and
# replace unvoiced samples by NaN to not plot
pitch_values = pitch.selected_array['frequency']
pitch_values[pitch_values==0] = np.nan
pitch_min = np.nanmin(pitch_values)
pitch_max = np.nanmax(pitch_values)
plt.plot(pitch.xs(), pitch_values, markersize=5, color='blue')
# plt.plot(pitch.xs(), pitch_values, markersize=, color='white')
# project maximum and minimum db to y axis in dotted line
pitch_min_index = np.where(pitch_values == pitch_min)
pitch_min_time = pitch.xs()[pitch_min_index[0]][0]
pitch_max_index = np.where(pitch_values == pitch_max)
pitch_max_time = pitch.xs()[pitch_max_index[0]][0]
plt.plot([pitch.xmin, pitch_min_time], [pitch_min, pitch_min], linewidth=1, linestyle='dotted', color='blue')
plt.plot([pitch.xmin, pitch_max_time], [pitch_max, pitch_max], linewidth=1, linestyle='dotted', color='blue')
# add text at pitch_min and pitch_max on y axis
# highlight pitch_min and pitch_max
plt.scatter(pitch_min_time, pitch_min, color='blue', s=100)
plt.scatter(pitch_max_time, pitch_max, color='blue', s=100)
plt.text(pitch_min_time-0.2, pitch_min-30, "f0min = " + str(round(pitch_min, 1) ), color='blue', fontsize=12)
plt.text(pitch_max_time-0.2, pitch_max+30, "f0max = " + str(round(pitch_max, 1) ), color='blue', fontsize=12)
plt.grid(False)
plt.ylim(max([0, pitch_min-50]), pitch_max+50)
plt.ylabel("fundamental frequency [Hz]")
def draw_spec_db_pitch(wav, save_fig_path=None):
# get figure
fig = plt.figure(figsize=(10, 5))
fig.tight_layout()
# get pitch, intensity, spectrogram
snd = parselmouth.Sound(str(wav))
pitch = snd.to_pitch()
intensity = snd.to_intensity()
pre_emphasized_snd = snd.copy()
pre_emphasized_snd.pre_emphasize()
spectrogram = pre_emphasized_snd.to_spectrogram(window_length=0.1)
# draw dB plot and spectrogram
plt.subplot(2, 1, 1)
draw_spectrogram(spectrogram)
plt.twinx()
draw_intensity(intensity)
plt.xlim([snd.xmin, snd.xmax])
# draw pitch contour
plt.subplot(2, 1, 2)
draw_pitch(pitch)
plt.xlim([snd.xmin, snd.xmax])
plt.xlabel("time [s]")
return fig
# f = draw_spec_db_pitch("./test.wav")
# plt.savefig("y.png")