Spaces:

thelou1s
/

yamnet_test

Sleeping

Luis

init2

97249f0 almost 2 years ago

3.32 kB

	import librosa.display
	import matplotlib
	import matplotlib.pyplot as plt
	import matplotlib.style as ms
	from PIL import Image

	import matplotlib.ticker as ticker
	from librosa.feature import melspectrogram
	from python.util.time_util import int_to_min_sec

	import librosa
	import librosa.display
	import numpy as np
	from io import BytesIO

	# https://stackoverflow.com/questions/69924881/userwarning-starting-a-matplotlib-gui-outside-of-the-main-thread-will-likely-fa
	matplotlib.use('agg')
	ms.use('seaborn-muted')


	def update_ticks(x, pos):
	which_second = (x / 16000)
	return int_to_min_sec(which_second)


	def plt_line(y_points, sample_rate=16000):
	# plt line
	fig, ax = plt.subplots()
	ax.xaxis.set_major_formatter(ticker.FuncFormatter(update_ticks))
	plt.plot(y_points)

	# plot to image
	buffer = BytesIO()
	plt.savefig(buffer, format='png')
	image = Image.open(buffer)

	return image


	# plt mfcc, https://www.cnblogs.com/LXP-Never/p/10918590.html
	def plt_mfcc(single_channel, sample_rate):
	mel_spec = melspectrogram(y=single_channel, sr=sample_rate, n_fft=1024, hop_length=512, n_mels=128)
	log_mel_spec = librosa.power_to_db(mel_spec)
	plt.figure()
	librosa.display.specshow(log_mel_spec, sr=sample_rate, x_axis='time', y_axis='mel')
	plt.colorbar(format='%+2.0f dB') # 右边的色度条
	plt.title('mfcc waveform')

	# plot to image
	buffer = BytesIO()
	plt.savefig(buffer, format='png')
	image = Image.open(buffer)

	return image


	# https://gist.github.com/stevemclaugh/80f192130852353ad53e6d8b6b275983
	def plt_mfcc2(wav_pathname, sample_rate):
	y, sr = librosa.load(wav_pathname)
	# Let's make and display a mel-scaled power (energy-squared) spectrogram
	S = librosa.feature.melspectrogram(y=y, sr=sample_rate, n_mels=128)

	# Convert to log scale (dB). We'll use the peak power as reference.
	log_S = librosa.amplitude_to_db(S)

	# Make a new figure
	plt.figure(figsize=(12, 4))

	# Display the spectrogram on a mel scale
	# sample rate and hop length parameters are used to render the time axis
	librosa.display.specshow(log_S, sr=sample_rate, x_axis='time', y_axis='mel')

	# Put a descriptive title on the plot
	plt.title('mel power spectrogram')

	# draw a color bar
	plt.colorbar(format='%+02.0f dB')

	# Make the figure layout compact
	plt.tight_layout()

	S_rot = np.rot90(S, 3)

	# Next, we'll extract the first 13 Mel-frequency cepstral coefficients (MFCCs)
	mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=13)

	# Padding first and second deltas
	delta_mfcc = librosa.feature.delta(mfcc)
	delta2_mfcc = librosa.feature.delta(mfcc, order=2)

	# We'll show each in its own subplot
	# plt.figure(figsize=(12, 6))
	plt.figure()

	# plt.subplot(3, 1, 1)
	# librosa.display.specshow(mfcc)
	# plt.ylabel('MFCC')
	# plt.colorbar()

	# plt.subplot(1, 1, 1)
	# librosa.display.specshow(delta_mfcc)
	# plt.ylabel('MFCC-$\Delta$')
	# plt.colorbar()

	plt.subplot()
	librosa.display.specshow(delta2_mfcc, sr=sample_rate, x_axis='time')
	plt.ylabel('MFCC-$\Delta^2$')
	plt.colorbar()

	plt.tight_layout()

	# plot to image
	buffer = BytesIO()
	plt.savefig(buffer, format='png')
	image = Image.open(buffer)

	return image