### Kevin @ Laronix from glob import glob from pathlib import Path import matplotlib from matplotlib.transforms import Bbox import matplotlib.pyplot as plt import numpy as np import pdb import parselmouth def draw_spectrogram(spectrogram, dynamic_range=80): X, Y = spectrogram.x_grid(), spectrogram.y_grid() sg_db = 10 * np.log10(spectrogram.values) plt.pcolormesh(X, Y, sg_db, vmin=sg_db.max() - dynamic_range) plt.ylim([spectrogram.ymin, spectrogram.ymax]) # TODO add colorbar to spectrogram with limitation from -40 to 0 # plt.xlabel("time [s]") plt.ylabel("frequency [Hz]") def draw_intensity(intensity): # draw intensity in red with x axis as time plt.plot(intensity.xs(), intensity.values.T, linewidth=3, color='r') plt.plot(intensity.xs(), intensity.values.T, linewidth=1, color="w") intensity_values = intensity.values.T # get range of intensity intensity_min = np.nanmin(intensity_values) intensity_max = np.nanmax(intensity_values) # project maximum and minimum intensity to y axis in dotted line intensity_min_index = np.where(intensity.values.T == intensity_min) intensity_min_time = intensity.xs()[intensity_min_index[0]][0] intensity_max_index = np.where(intensity.values.T == intensity_max) intensity_max_time = intensity.xs()[intensity_max_index[0]][0] plt.plot([intensity.xmax, intensity_min_time], [intensity_min, intensity_min], linewidth=1, linestyle='dotted', color='red') plt.plot([intensity.xmax, intensity_max_time], [intensity_max, intensity_max], linewidth=1, linestyle='dotted', color='red') # add text at intensity_min and intensity_max on y axis plt.text(intensity.xmax, intensity_min, str(round(intensity_min, 1)), color='red') plt.text(intensity.xmax, intensity_max, str(round(intensity_max, 1)), color='red') # get intensity.min's index plt.grid(False) plt.ylim(0) plt.ylabel("intensity [dB]") def draw_pitch(pitch): # Extract selected pitch contour, and # replace unvoiced samples by NaN to not plot pitch_values = pitch.selected_array['frequency'] pitch_values[pitch_values==0] = np.nan pitch_min = np.nanmin(pitch_values) pitch_max = np.nanmax(pitch_values) plt.plot(pitch.xs(), pitch_values, markersize=5, color='blue') # plt.plot(pitch.xs(), pitch_values, markersize=, color='white') # project maximum and minimum db to y axis in dotted line pitch_min_index = np.where(pitch_values == pitch_min) pitch_min_time = pitch.xs()[pitch_min_index[0]][0] pitch_max_index = np.where(pitch_values == pitch_max) pitch_max_time = pitch.xs()[pitch_max_index[0]][0] plt.plot([pitch.xmin, pitch_min_time], [pitch_min, pitch_min], linewidth=1, linestyle='dotted', color='blue') plt.plot([pitch.xmin, pitch_max_time], [pitch_max, pitch_max], linewidth=1, linestyle='dotted', color='blue') # add text at pitch_min and pitch_max on y axis # highlight pitch_min and pitch_max plt.scatter(pitch_min_time, pitch_min, color='blue', s=100) plt.scatter(pitch_max_time, pitch_max, color='blue', s=100) plt.text(pitch_min_time-0.2, pitch_min-30, "f0min = " + str(round(pitch_min, 1) ), color='blue', fontsize=12) plt.text(pitch_max_time-0.2, pitch_max+30, "f0max = " + str(round(pitch_max, 1) ), color='blue', fontsize=12) plt.grid(False) plt.ylim(max([0, pitch_min-50]), pitch_max+50) plt.ylabel("fundamental frequency [Hz]") def draw_spec_db_pitch(wav, save_fig_path=None): # get figure fig = plt.figure(figsize=(10, 5)) fig.tight_layout() # get pitch, intensity, spectrogram snd = parselmouth.Sound(str(wav)) pitch = snd.to_pitch() intensity = snd.to_intensity() pre_emphasized_snd = snd.copy() pre_emphasized_snd.pre_emphasize() spectrogram = pre_emphasized_snd.to_spectrogram(window_length=0.1) # draw dB plot and spectrogram plt.subplot(2, 1, 1) draw_spectrogram(spectrogram) plt.twinx() draw_intensity(intensity) plt.xlim([snd.xmin, snd.xmax]) # draw pitch contour plt.subplot(2, 1, 2) draw_pitch(pitch) plt.xlim([snd.xmin, snd.xmax]) plt.xlabel("time [s]") return fig # f = draw_spec_db_pitch("./test.wav") # plt.savefig("y.png")