first update
Browse files- README.md +5 -7
- app.py +40 -0
- requirements.txt +5 -0
README.md
CHANGED
@@ -1,13 +1,11 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
license: mit
|
11 |
---
|
12 |
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Text to Speech Russian free multispeaker model
|
3 |
+
emoji: 🐠
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: yellow
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.19.2
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
|
|
app.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import VitsModel, AutoTokenizer, set_seed
|
3 |
+
import torch
|
4 |
+
import scipy.io.wavfile
|
5 |
+
from ruaccent import RUAccent
|
6 |
+
|
7 |
+
speakers={"man": 0,
|
8 |
+
"woman": 1}
|
9 |
+
|
10 |
+
model = VitsModel.from_pretrained("utrobinmv/tts_ru_free_hf_vits_low_multispeaker")
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained("utrobinmv/tts_ru_free_hf_vits_low_multispeaker")
|
12 |
+
model.eval()
|
13 |
+
set_seed(555)
|
14 |
+
|
15 |
+
accentizer = RUAccent()
|
16 |
+
accentizer.load(omograph_model_size='turbo', use_dictionary=True)
|
17 |
+
|
18 |
+
|
19 |
+
def generate_audio(speaker_name, text):
|
20 |
+
text = accentizer.process_all(text)
|
21 |
+
inputs = tokenizer(text, return_tensors="pt")
|
22 |
+
inputs['speaker_id'] = speakers[speaker_name]
|
23 |
+
|
24 |
+
with torch.no_grad():
|
25 |
+
output = model(**inputs).waveform
|
26 |
+
|
27 |
+
scipy.io.wavfile.write("output.wav", rate=model.config.sampling_rate, data=output[0].cpu().numpy())
|
28 |
+
|
29 |
+
return "output.wav"
|
30 |
+
|
31 |
+
speaker_dropdown = gr.Dropdown(
|
32 |
+
choices=speakers,
|
33 |
+
label="Speaker id",
|
34 |
+
value='woman',
|
35 |
+
info=f"Models are trained on 2 speakers",
|
36 |
+
interactive=True
|
37 |
+
)
|
38 |
+
|
39 |
+
iface = gr.Interface(fn=generate_audio, inputs=[speaker_dropdown,"text"], outputs="audio", title="Text to Speech Russian free multispeaker model", description="Введите текст на русском языке, чтобы преобразовать его в русскую звуковую речь. Пример текста: Привет, как дела? А у тебя как?")
|
40 |
+
iface.launch(share=True)
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
scipy
|
3 |
+
sentencepiece==0.1.99
|
4 |
+
ruaccent==1.5.6.3
|
5 |
+
transformers==4.38.1
|