File size: 2,606 Bytes
d1a84ee
41ba53f
92e68f4
 
73eaac3
3dbfd73
 
 
 
d1a84ee
75363d3
 
 
73eaac3
df1ad02
7383c33
 
a28cfa9
7383c33
df1ad02
7383c33
a28cfa9
7383c33
d1a84ee
7383c33
 
 
 
df1ad02
 
d1a84ee
df1ad02
 
 
 
 
 
 
 
 
92e68f4
5ec3478
 
 
 
 
 
92e68f4
41ba53f
5ec3478
92e68f4
df1ad02
 
 
 
 
 
d327a94
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
## build wavegru-cpp
# import os
# os.system("./bazelisk-linux-amd64 clean --expunge")
# os.system("./bazelisk-linux-amd64 build wavegru_mod -c opt --copt=-march=native")

# install espeak
import os

os.system("bash ./install_espeak_ng.sh")

import gradio as gr
from inference import load_tacotron_model, load_wavegru_net, mel_to_wav, text_to_mel
from wavegru_cpp import extract_weight_mask, load_wavegru_cpp


def speak(text):
    alphabet, tacotron_net, tacotron_config = load_tacotron_model(
        "./alphabet.txt", "./tacotron.toml", "./tacotrons_ljs_24k_v1_0300000.ckpt"
    )

    wavegru_config, wavegru_net = load_wavegru_net(
        "./wavegru.yaml", "./wavegru_vocoder_1024_v3_1810000.ckpt"
    )

    wave_cpp_weight_mask = extract_weight_mask(wavegru_net)
    wavecpp = load_wavegru_cpp(
        wave_cpp_weight_mask, wavegru_config["upsample_factors"][-1]
    )

    mel = text_to_mel(tacotron_net, text, alphabet, tacotron_config)
    y = mel_to_wav(wavegru_net, wavecpp, mel, wavegru_config)
    return 24_000, y


title = "WaveGRU-TTS"
description = "WaveGRU text-to-speech demo."

gr.Interface(
    fn=speak,
    inputs="text",
    examples=[
        "This is a test!",
        "President Trump met with other leaders at the Group of 20 conference.",
        "The buses aren't the problem, they actually provide a solution.",
        "Generative adversarial network or variational auto-encoder.",
        "Basilar membrane and otolaryngology are not auto-correlations.",
        "There are several variations on the full gated unit, with gating done using the previous hidden state and the bias in various combinations, and a simplified form called minimal gated unit.",
        "October arrived, spreading a damp chill over the grounds and into the castle. Madam Pomfrey, the nurse, was kept busy by a sudden spate of colds among the staff and students.",
        "Artificial intelligence is intelligence demonstrated by machines, as opposed to natural intelligence displayed by animals including humans.",
        'Uncle Vernon entered the kitchen as Harry was turning over the bacon. "Comb your hair!" he barked, by way of a morning greeting. About once a week, Uncle Vernon looked over the top of his newspaper and shouted that Harry needed a haircut. Harry must have had more haircuts than the rest of the boys in his class put together, but it made no difference, his hair simply grew that way - all over the place.',
    ],
    outputs="audio",
    title=title,
    description=description,
    theme="default",
    allow_screenshot=False,
    allow_flagging="never",
).launch()