Spaces:
Runtime error
Runtime error
Serhiy Stetskovych
commited on
Commit
·
9cb2738
1
Parent(s):
5db46b8
Add list of prompts
Browse files- app.py +11 -28
- prompt22050.wav +0 -0
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
from pathlib import Path
|
3 |
import torchaudio
|
4 |
import gradio as gr
|
@@ -34,23 +34,12 @@ VOCODER44_MODEL_PATH = 'patriotyk/vocos-mel-hifigan-compat-44100khz'
|
|
34 |
HIFIGAN_MODEL_PATH = 'checkpoints/g_00120000'
|
35 |
|
36 |
|
37 |
-
|
38 |
-
wav, sr = torchaudio.load('prompt22050.wav')
|
39 |
-
|
40 |
-
prompt = mel_spectrogram(
|
41 |
-
transform(wav),
|
42 |
-
1024,
|
43 |
-
80,
|
44 |
-
22050,
|
45 |
-
256,
|
46 |
-
1024,
|
47 |
-
0,
|
48 |
-
8000,
|
49 |
-
center=False,
|
50 |
-
)[:,:,:264]
|
51 |
-
|
52 |
|
53 |
|
|
|
|
|
|
|
54 |
|
55 |
def process_text(text: str, device: torch.device):
|
56 |
x = torch.tensor(
|
@@ -89,16 +78,6 @@ def load_vocos(checkpoint_path, config_path, device):
|
|
89 |
def to_waveform(mel, vocoder, denoiser=None):
|
90 |
return vocoder.decode(mel).clamp(-1, 1).cpu().squeeze()
|
91 |
|
92 |
-
# audio = vocoder(mel).clamp(-1, 1)
|
93 |
-
# if denoiser is not None:
|
94 |
-
# audio = denoiser(audio.squeeze(), strength=0.00025).cpu().squeeze()
|
95 |
-
|
96 |
-
# return audio.cpu().squeeze()
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
|
103 |
def get_device():
|
104 |
if torch.cuda.is_available():
|
@@ -123,11 +102,14 @@ denoiser = None#Denoiser(vocoder, mode="zeros")
|
|
123 |
|
124 |
|
125 |
@torch.inference_mode()
|
126 |
-
def synthesise(text, speed):
|
127 |
if len(text) > 1000:
|
128 |
raise gr.Error("Текст повинен бути коротшим за 1000 символів.")
|
129 |
|
130 |
text_processed = process_text(text.strip(), device)
|
|
|
|
|
|
|
131 |
|
132 |
output = model.synthesise(
|
133 |
text_processed["x"].to(device),
|
@@ -165,7 +147,8 @@ if __name__ == "__main__":
|
|
165 |
description=description,
|
166 |
inputs=[
|
167 |
gr.Text(label='Текст для синтезу:', lines=5, max_lines=10),
|
168 |
-
gr.
|
|
|
169 |
],
|
170 |
outputs=[
|
171 |
gr.Text(label='Фонемізований текст:', lines=5),
|
|
|
1 |
+
import os
|
2 |
from pathlib import Path
|
3 |
import torchaudio
|
4 |
import gradio as gr
|
|
|
34 |
HIFIGAN_MODEL_PATH = 'checkpoints/g_00120000'
|
35 |
|
36 |
|
37 |
+
volnorm = torchaudio.transforms.Vol(gain=-32, gain_type="db")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
|
40 |
+
prompts_dir = 'prompts'
|
41 |
+
print(os.listdir(prompts_dir))
|
42 |
+
prompts_list = sorted(os.listdir(prompts_dir), key=lambda x: x.split('.')[0])
|
43 |
|
44 |
def process_text(text: str, device: torch.device):
|
45 |
x = torch.tensor(
|
|
|
78 |
def to_waveform(mel, vocoder, denoiser=None):
|
79 |
return vocoder.decode(mel).clamp(-1, 1).cpu().squeeze()
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
def get_device():
|
83 |
if torch.cuda.is_available():
|
|
|
102 |
|
103 |
|
104 |
@torch.inference_mode()
|
105 |
+
def synthesise(text, prompt_selection, speed):
|
106 |
if len(text) > 1000:
|
107 |
raise gr.Error("Текст повинен бути коротшим за 1000 символів.")
|
108 |
|
109 |
text_processed = process_text(text.strip(), device)
|
110 |
+
prompt_audio_path = os.path.join(prompts_dir, prompt_selection)
|
111 |
+
wav, sr = torchaudio.load(prompt_audio_path)
|
112 |
+
prompt = mel_spectrogram(volnorm(wav), 1024, 80, 22050, 256, 1024, 0, 8000, center=False)[:,:,:264]
|
113 |
|
114 |
output = model.synthesise(
|
115 |
text_processed["x"].to(device),
|
|
|
147 |
description=description,
|
148 |
inputs=[
|
149 |
gr.Text(label='Текст для синтезу:', lines=5, max_lines=10),
|
150 |
+
gr.Dropdown(label="Prompt audio", choices=prompts_list, value=prompts_list[0]),
|
151 |
+
gr.Slider(minimum=0.6, maximum=2.0, label="Швидкість", value=1.1)
|
152 |
],
|
153 |
outputs=[
|
154 |
gr.Text(label='Фонемізований текст:', lines=5),
|
prompt22050.wav
DELETED
Binary file (655 kB)
|
|