File size: 1,488 Bytes
96094ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import torch

from transformers import pipeline

import numpy as np
import gradio as gr

set_seed(0)

def _grab_best_device(use_gpu=True):
    if torch.cuda.device_count() > 0 and use_gpu:
        device = "cuda"
    else:
        device = "cpu"
    return device

device = _grab_best_device()

HUB_PATH = "ylacombe/vits_vctk_welsh_male"
pipe = pipeline(HUB_PATH, device=0)

title = "# 🐶 VITS"

description = """

"""

num_speakers = pipe.model.config.num_speakers

# Inference
def generate_audio(text, spkr_id):

    forward_params = {"spkr_id": spkr_id}
    output = pipeline(text, forward_params=forward_params)
    
    return (output["sampling_rate"], output["audio"].squeeze())


# Gradio blocks demo    
with gr.Blocks() as demo_blocks:
    gr.Markdown(title)
    gr.Markdown(description)
    with gr.Row():
        with gr.Column():
            inp_text = gr.Textbox(label="Input Text", info="What would you like bark to synthesise?")
            spkr = gr.Dropdown(
                    [i for i in range(num_speakers)],
                    value=None, 
                    label="Speaker ID", 
                    info="Default: Unconditional Generation"
                    )
            btn = gr.Button("Generate Audio!")
    
        with gr.Column():
            out_audio_vocos = gr.Audio(type="numpy", autoplay=False, label="Generated Audio", show_label=True)
    
    btn.click(generate_audio, [inp_text, spk], [out_audio_vocos])

demo_blocks.queue().launch(debug=True)