ChatGPT-with-Voice-Cloning-in-Chinese

Build error

App Files Files Community

ChatGPT-with-Voice-Cloning-in-Chinese / utils /f0_utils.py

Kevin676

Duplicate from lewiswu1209/MockingBird

4817bcc over 1 year ago

raw

history blame

3.61 kB

	import logging
	import numpy as np
	import pyworld
	from scipy.interpolate import interp1d
	from scipy.signal import firwin, get_window, lfilter

	def compute_mean_std(lf0):
	nonzero_indices = np.nonzero(lf0)
	mean = np.mean(lf0[nonzero_indices])
	std = np.std(lf0[nonzero_indices])
	return mean, std


	def compute_f0(wav, sr=16000, frame_period=10.0):
	"""Compute f0 from wav using pyworld harvest algorithm."""
	wav = wav.astype(np.float64)
	f0, _ = pyworld.harvest(
	wav, sr, frame_period=frame_period, f0_floor=80.0, f0_ceil=600.0)
	return f0.astype(np.float32)

	def f02lf0(f0):
	lf0 = f0.copy()
	nonzero_indices = np.nonzero(f0)
	lf0[nonzero_indices] = np.log(f0[nonzero_indices])
	return lf0

	def get_converted_lf0uv(
	wav,
	lf0_mean_trg,
	lf0_std_trg,
	convert=True,
	):
	f0_src = compute_f0(wav)
	if not convert:
	uv, cont_lf0 = get_cont_lf0(f0_src)
	lf0_uv = np.concatenate([cont_lf0[:, np.newaxis], uv[:, np.newaxis]], axis=1)
	return lf0_uv

	lf0_src = f02lf0(f0_src)
	lf0_mean_src, lf0_std_src = compute_mean_std(lf0_src)

	lf0_vc = lf0_src.copy()
	lf0_vc[lf0_src > 0.0] = (lf0_src[lf0_src > 0.0] - lf0_mean_src) / lf0_std_src * lf0_std_trg + lf0_mean_trg
	f0_vc = lf0_vc.copy()
	f0_vc[lf0_src > 0.0] = np.exp(lf0_vc[lf0_src > 0.0])

	uv, cont_lf0_vc = get_cont_lf0(f0_vc)
	lf0_uv = np.concatenate([cont_lf0_vc[:, np.newaxis], uv[:, np.newaxis]], axis=1)
	return lf0_uv

	def low_pass_filter(x, fs, cutoff=70, padding=True):
	"""FUNCTION TO APPLY LOW PASS FILTER

	Args:
	x (ndarray): Waveform sequence
	fs (int): Sampling frequency
	cutoff (float): Cutoff frequency of low pass filter

	Return:
	(ndarray): Low pass filtered waveform sequence
	"""

	nyquist = fs // 2
	norm_cutoff = cutoff / nyquist

	# low cut filter
	numtaps = 255
	fil = firwin(numtaps, norm_cutoff)
	x_pad = np.pad(x, (numtaps, numtaps), 'edge')
	lpf_x = lfilter(fil, 1, x_pad)
	lpf_x = lpf_x[numtaps + numtaps // 2: -numtaps // 2]

	return lpf_x


	def convert_continuos_f0(f0):
	"""CONVERT F0 TO CONTINUOUS F0

	Args:
	f0 (ndarray): original f0 sequence with the shape (T)

	Return:
	(ndarray): continuous f0 with the shape (T)
	"""
	# get uv information as binary
	uv = np.float32(f0 != 0)

	# get start and end of f0
	if (f0 == 0).all():
	logging.warn("all of the f0 values are 0.")
	return uv, f0
	start_f0 = f0[f0 != 0][0]
	end_f0 = f0[f0 != 0][-1]

	# padding start and end of f0 sequence
	start_idx = np.where(f0 == start_f0)[0][0]
	end_idx = np.where(f0 == end_f0)[0][-1]
	f0[:start_idx] = start_f0
	f0[end_idx:] = end_f0

	# get non-zero frame index
	nz_frames = np.where(f0 != 0)[0]

	# perform linear interpolation
	f = interp1d(nz_frames, f0[nz_frames])
	cont_f0 = f(np.arange(0, f0.shape[0]))

	return uv, cont_f0


	def get_cont_lf0(f0, frame_period=10.0, lpf=False):
	uv, cont_f0 = convert_continuos_f0(f0)
	if lpf:
	cont_f0_lpf = low_pass_filter(cont_f0, int(1.0 / (frame_period * 0.001)), cutoff=20)
	cont_lf0_lpf = cont_f0_lpf.copy()
	nonzero_indices = np.nonzero(cont_lf0_lpf)
	cont_lf0_lpf[nonzero_indices] = np.log(cont_f0_lpf[nonzero_indices])
	# cont_lf0_lpf = np.log(cont_f0_lpf)
	return uv, cont_lf0_lpf
	else:
	nonzero_indices = np.nonzero(cont_f0)
	cont_lf0 = cont_f0.copy()
	cont_lf0[cont_f0>0] = np.log(cont_f0[cont_f0>0])
	return uv, cont_lf0