Spaces:

lojban
/

text-to-speech

Running

App Files Files Community

text-to-speech / libs /audio.py

lojban

save Nix-Stochastic as ogg

e85d807 over 2 years ago

raw

history blame contribute delete

1.94 kB

	import numpy as np
	import pydub
	from re import sub

	def float2pcm(sig, dtype='int16'):
	"""Convert floating point signal with a range from -1 to 1 to PCM.
	Any signal values outside the interval [-1.0, 1.0) are clipped.
	No dithering is used.
	Note that there are different possibilities for scaling floating
	point numbers to PCM numbers, this function implements just one of
	them. For an overview of alternatives see
	http://blog.bjornroche.com/2009/12/int-float-int-its-jungle-out-there.html
	Parameters
	----------
	sig : array_like
	Input array, must have floating point type.
	dtype : data type, optional
	Desired (integer) data type.
	Returns
	-------
	numpy.ndarray
	Integer data, scaled and clipped to the range of the given
	dtype.
	See Also
	--------
	pcm2float, dtype
	"""
	sig = np.asarray(sig)
	if sig.dtype.kind != 'f':
	raise TypeError("'sig' must be a float array")
	dtype = np.dtype(dtype)
	if dtype.kind not in 'iu':
	raise TypeError("'dtype' must be an integer type")

	i = np.iinfo(dtype)
	abs_max = 2 ** (i.bits - 1)
	offset = i.min + abs_max
	return (sig * abs_max + offset).clip(i.min, i.max).astype(dtype)

	def strip_text(text: str) -> str:
	return sub(r"[^a-zA-Z0-9 ]", "", text)

	def wav2ogg(x, sr, text, language, normalized=True):
	print(x,sr,text,language)
	"""numpy array to MP3"""
	channels = 2 if (x.ndim == 2 and x.shape[1] == 2) else 1
	if normalized: # normalized array - each item should be a float in [-1, 1)
	y = np.int16(x * 2 ** 15)
	else:
	y = np.int16(x)
	song = pydub.AudioSegment(y.tobytes(), frame_rate=sr, sample_width=2, channels=channels)
	path = f"/tmp/{language}-{strip_text(text)}.ogg"
	song.export(path, format="ogg", codec="libvorbis")
	# samples = song.get_array_of_samples()
	return path # np.array(samples)