File size: 1,552 Bytes
d1a84ee
 
 
92e68f4
 
73eaac3
d1a84ee
75363d3
 
 
 
73eaac3
df1ad02
 
 
 
 
495433d
df1ad02
d1a84ee
 
 
df1ad02
 
 
495433d
d1a84ee
495433d
df1ad02
 
 
 
 
 
 
 
 
92e68f4
 
 
 
 
df1ad02
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import os

## build wavegru-cpp
# os.system("./bazelisk-linux-amd64 clean --expunge")
# os.system("./bazelisk-linux-amd64 build wavegru_mod -c opt --copt=-march=native")


import gradio as gr

from inference import load_tacotron_model, load_wavegru_net, mel_to_wav, text_to_mel
from wavegru_cpp import extract_weight_mask, load_wavegru_cpp

alphabet, tacotron_net, tacotron_config = load_tacotron_model(
    "./alphabet.txt", "./tacotron.toml", "./pretrained_model_ljs_500k.ckpt"
)


wavegru_config, wavegru_net = load_wavegru_net("./wavegru.yaml", "./wavegru.ckpt")

wave_cpp_weight_mask = extract_weight_mask(wavegru_net)
wavecpp = load_wavegru_cpp(wave_cpp_weight_mask)


def speak(text):
    mel = text_to_mel(tacotron_net, text, alphabet, tacotron_config)
    print(mel.shape)
    y = mel_to_wav(wavegru_net, wavecpp, mel, wavegru_config)
    print(y.shape)
    return 24_000, y


title = "WaveGRU-TTS"
description = "WaveGRU text-to-speech demo."

gr.Interface(
    fn=speak,
    inputs="text",
    examples=[
        "this is a test!",
        "October arrived, spreading a damp chill over the grounds and into the castle. Madam Pomfrey, the nurse, was kept busy by a sudden spate of colds among the staff and students.",
        "Artificial intelligence is intelligence demonstrated by machines, as opposed to natural intelligence displayed by animals including humans",
    ],
    outputs="audio",
    title=title,
    description=description,
    theme="default",
    allow_screenshot=False,
    allow_flagging="never",
).launch(debug=False)