text-to-speech / libs /audio.py
lojban's picture
save Nix-Stochastic as ogg
import numpy as np
import pydub
from re import sub
def float2pcm(sig, dtype='int16'):
"""Convert floating point signal with a range from -1 to 1 to PCM.
Any signal values outside the interval [-1.0, 1.0) are clipped.
No dithering is used.
Note that there are different possibilities for scaling floating
point numbers to PCM numbers, this function implements just one of
them. For an overview of alternatives see
sig : array_like
Input array, must have floating point type.
dtype : data type, optional
Desired (integer) data type.
Integer data, scaled and clipped to the range of the given
See Also
pcm2float, dtype
sig = np.asarray(sig)
if sig.dtype.kind != 'f':
raise TypeError("'sig' must be a float array")
dtype = np.dtype(dtype)
if dtype.kind not in 'iu':
raise TypeError("'dtype' must be an integer type")
i = np.iinfo(dtype)
abs_max = 2 ** (i.bits - 1)
offset = i.min + abs_max
return (sig * abs_max + offset).clip(i.min, i.max).astype(dtype)
def strip_text(text: str) -> str:
return sub(r"[^a-zA-Z0-9 ]", "", text)
def wav2ogg(x, sr, text, language, normalized=True):
"""numpy array to MP3"""
channels = 2 if (x.ndim == 2 and x.shape[1] == 2) else 1
if normalized: # normalized array - each item should be a float in [-1, 1)
y = np.int16(x * 2 ** 15)
y = np.int16(x)
song = pydub.AudioSegment(y.tobytes(), frame_rate=sr, sample_width=2, channels=channels)
path = f"/tmp/{language}-{strip_text(text)}.ogg"
song.export(path, format="ogg", codec="libvorbis")
# samples = song.get_array_of_samples()
return path # np.array(samples)