Spaces:
Paused
Paused
import torch | |
from transformers import pipeline | |
import numpy as np | |
import gradio as gr | |
def _grab_best_device(use_gpu=True): | |
if torch.cuda.device_count() > 0 and use_gpu: | |
device = "cuda" | |
else: | |
device = "cpu" | |
return device | |
device = _grab_best_device() | |
HUB_PATH = "ylacombe/vits_vctk_welsh_male" | |
pipe = pipeline("text-to-speech", model=HUB_PATH, device=0) | |
title = "# 🐶 VITS" | |
description = """ | |
""" | |
num_speakers = pipe.model.config.num_speakers | |
# Inference | |
def generate_audio(text): | |
out = [] | |
for i in range(num_speakers): | |
forward_params = {"speaker_id": i} | |
output = pipe(text, forward_params=forward_params) | |
out.append((output["sampling_rate"], output["audio"].squeeze())) | |
return out | |
# Gradio blocks demo | |
with gr.Blocks() as demo_blocks: | |
gr.Markdown(title) | |
gr.Markdown(description) | |
with gr.Row(): | |
with gr.Column(): | |
inp_text = gr.Textbox(label="Input Text", info="What would you like bark to synthesise?") | |
btn = gr.Button("Generate Audio!") | |
with gr.Column(): | |
outputs = [] | |
for i in range(num_speakers): | |
out_audio = gr.Audio(type="numpy", autoplay=False, label=f"Generated Audio {i}", show_label=True) | |
outputs.append(out_audio) | |
btn.click(generate_audio, [inp_text], [outputs]) | |
demo_blocks.launch() |