Spaces:
Running
Running
import numpy as np | |
import pydub | |
from re import sub | |
def float2pcm(sig, dtype='int16'): | |
"""Convert floating point signal with a range from -1 to 1 to PCM. | |
Any signal values outside the interval [-1.0, 1.0) are clipped. | |
No dithering is used. | |
Note that there are different possibilities for scaling floating | |
point numbers to PCM numbers, this function implements just one of | |
them. For an overview of alternatives see | |
http://blog.bjornroche.com/2009/12/int-float-int-its-jungle-out-there.html | |
Parameters | |
---------- | |
sig : array_like | |
Input array, must have floating point type. | |
dtype : data type, optional | |
Desired (integer) data type. | |
Returns | |
------- | |
numpy.ndarray | |
Integer data, scaled and clipped to the range of the given | |
*dtype*. | |
See Also | |
-------- | |
pcm2float, dtype | |
""" | |
sig = np.asarray(sig) | |
if sig.dtype.kind != 'f': | |
raise TypeError("'sig' must be a float array") | |
dtype = np.dtype(dtype) | |
if dtype.kind not in 'iu': | |
raise TypeError("'dtype' must be an integer type") | |
i = np.iinfo(dtype) | |
abs_max = 2 ** (i.bits - 1) | |
offset = i.min + abs_max | |
return (sig * abs_max + offset).clip(i.min, i.max).astype(dtype) | |
def strip_text(text: str) -> str: | |
return sub(r"[^a-zA-Z0-9 ]", "", text) | |
def wav2ogg(x, sr, text, language, normalized=True): | |
print(x,sr,text,language) | |
"""numpy array to MP3""" | |
channels = 2 if (x.ndim == 2 and x.shape[1] == 2) else 1 | |
if normalized: # normalized array - each item should be a float in [-1, 1) | |
y = np.int16(x * 2 ** 15) | |
else: | |
y = np.int16(x) | |
song = pydub.AudioSegment(y.tobytes(), frame_rate=sr, sample_width=2, channels=channels) | |
path = f"/tmp/{language}-{strip_text(text)}.ogg" | |
song.export(path, format="ogg", codec="libvorbis") | |
# samples = song.get_array_of_samples() | |
return path # np.array(samples) | |