utrobinmv commited on
Commit
871432d
1 Parent(s): 3239dab

first update

Browse files
Files changed (3) hide show
  1. README.md +5 -7
  2. app.py +40 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -1,13 +1,11 @@
1
  ---
2
- title: Tts Ru Free Hf Vits Low Multispeaker
3
- emoji: 😻
4
- colorFrom: gray
5
- colorTo: gray
6
  sdk: gradio
7
- sdk_version: 4.28.3
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Text to Speech Russian free multispeaker model
3
+ emoji: 🐠
4
+ colorFrom: purple
5
+ colorTo: yellow
6
  sdk: gradio
7
+ sdk_version: 4.19.2
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
 
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import VitsModel, AutoTokenizer, set_seed
3
+ import torch
4
+ import scipy.io.wavfile
5
+ from ruaccent import RUAccent
6
+
7
+ speakers={"man": 0,
8
+ "woman": 1}
9
+
10
+ model = VitsModel.from_pretrained("utrobinmv/tts_ru_free_hf_vits_low_multispeaker")
11
+ tokenizer = AutoTokenizer.from_pretrained("utrobinmv/tts_ru_free_hf_vits_low_multispeaker")
12
+ model.eval()
13
+ set_seed(555)
14
+
15
+ accentizer = RUAccent()
16
+ accentizer.load(omograph_model_size='turbo', use_dictionary=True)
17
+
18
+
19
+ def generate_audio(speaker_name, text):
20
+ text = accentizer.process_all(text)
21
+ inputs = tokenizer(text, return_tensors="pt")
22
+ inputs['speaker_id'] = speakers[speaker_name]
23
+
24
+ with torch.no_grad():
25
+ output = model(**inputs).waveform
26
+
27
+ scipy.io.wavfile.write("output.wav", rate=model.config.sampling_rate, data=output[0].cpu().numpy())
28
+
29
+ return "output.wav"
30
+
31
+ speaker_dropdown = gr.Dropdown(
32
+ choices=speakers,
33
+ label="Speaker id",
34
+ value='woman',
35
+ info=f"Models are trained on 2 speakers",
36
+ interactive=True
37
+ )
38
+
39
+ iface = gr.Interface(fn=generate_audio, inputs=[speaker_dropdown,"text"], outputs="audio", title="Text to Speech Russian free multispeaker model", description="Введите текст на русском языке, чтобы преобразовать его в русскую звуковую речь. Пример текста: Привет, как дела? А у тебя как?")
40
+ iface.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ scipy
3
+ sentencepiece==0.1.99
4
+ ruaccent==1.5.6.3
5
+ transformers==4.38.1