transformers_js_py
from transformers_js_py import import_transformers_js
import gradio as gr
import numpy as np
transformers_js = await import_transformers_js("3.0.2")
pipeline = transformers_js.pipeline
synthesizer = await pipeline(
'text-to-speech',
'Xenova/speecht5_tts',
{ "quantized": False }
)
speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
async def synthesize(text):
out = await synthesizer(text, { "speaker_embeddings": speaker_embeddings });
audio_data_memory_view = out["audio"]
sampling_rate = out["sampling_rate"]
audio_data = np.frombuffer(audio_data_memory_view, dtype=np.float32)
audio_data_16bit = (audio_data * 32767).astype(np.int16)
return sampling_rate, audio_data_16bit
demo = gr.Interface(synthesize, "textbox", "audio")
demo.launch()